From b376e37a65ebd43fac9b8007c9f30dbd6c790fa3 Mon Sep 17 00:00:00 2001 From: David Strawn Date: Sat, 5 Feb 2022 12:42:45 -0700 Subject: [PATCH 01/10] Implement Unicode Case Folding This commit adds CaseFoldedString as a partner to CIString. A CaseFoldedString is case folded according to the Unicode rules for Caseless Matching. In contrast to CIString, it does _not_ keep a reference to the input `String`. This commit changes CIString to be based on CaseFoldedString. --- .../ci/bench/CaseFoldedStringBench.scala | 44 + build.sbt | 7 +- .../scala/org/typelevel/ci/CIString.scala | 66 +- .../org/typelevel/ci/CaseFoldedString.scala | 161 ++ .../scala/org/typelevel/ci/CaseFolds.scala | 1550 +++++++++++++++++ .../typelevel/ci/testing/arbitraries.scala | 68 +- .../ci/CaseFoldedStringJVMSuite.scala | 25 + .../org/typelevel/ci/CIStringSuite.scala | 60 +- .../typelevel/ci/CaseFoldedStringSuite.scala | 190 ++ 9 files changed, 2128 insertions(+), 43 deletions(-) create mode 100644 bench/src/main/scala/com/rossabaker/ci/bench/CaseFoldedStringBench.scala create mode 100644 core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala create mode 100644 core/src/main/scala/org/typelevel/ci/CaseFolds.scala create mode 100644 tests/jvm/src/test/scala/org/typelevel/ci/CaseFoldedStringJVMSuite.scala create mode 100644 tests/shared/src/test/scala/org/typelevel/ci/CaseFoldedStringSuite.scala diff --git a/bench/src/main/scala/com/rossabaker/ci/bench/CaseFoldedStringBench.scala b/bench/src/main/scala/com/rossabaker/ci/bench/CaseFoldedStringBench.scala new file mode 100644 index 0000000..082a1ac --- /dev/null +++ b/bench/src/main/scala/com/rossabaker/ci/bench/CaseFoldedStringBench.scala @@ -0,0 +1,44 @@ +package org.typelevel.ci +package bench + +import org.scalacheck._ +import org.typelevel.ci.testing.arbitraries._ +import cats._ +import org.openjdk.jmh.annotations._ +import java.util.concurrent.TimeUnit + +@State(Scope.Thread) +@BenchmarkMode(Array(Mode.Throughput, Mode.AverageTime)) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +class CaseFoldedStringBench { + + var currentSeed: Long = Long.MinValue + + def nextSeed: Long = { + val seed = currentSeed + currentSeed += 1L + seed + } + + def nextString: String = + Arbitrary.arbitrary[String].apply(Gen.Parameters.default, rng.Seed(nextSeed)).getOrElse(throw new AssertionError("Failed to generate String.")) + + def nextListOfString: List[String] = + Gen.listOf(Arbitrary.arbitrary[String])(Gen.Parameters.default, rng.Seed(nextSeed)).getOrElse(throw new AssertionError("Failed to generate String.")) + + @Benchmark + def caseFoldedStringHash: Int = + CaseFoldedString(nextString).hashCode + + @Benchmark + def caseFoldedStringFoldMap: CaseFoldedString = + Foldable[List].foldMap(nextListOfString)(CaseFoldedString.apply) + + @Benchmark + def stringHash: Int = + nextString.hashCode + + @Benchmark + def stringFoldMap: String = + Foldable[List].foldMap(nextListOfString)(identity) +} diff --git a/build.sbt b/build.sbt index a3293f0..0f7204b 100644 --- a/build.sbt +++ b/build.sbt @@ -71,9 +71,12 @@ lazy val bench = project .enablePlugins(NoPublishPlugin) .enablePlugins(JmhPlugin) .settings( - name := "case-insensitive-bench" + name := "case-insensitive-bench", + libraryDependencies ++= List( + "org.scalacheck" %% "scalacheck" % scalacheckV + ) ) - .dependsOn(core.jvm) + .dependsOn(core.jvm, testing.jvm) lazy val docs = project .in(file("site")) diff --git a/core/src/main/scala/org/typelevel/ci/CIString.scala b/core/src/main/scala/org/typelevel/ci/CIString.scala index 13710be..6b9e339 100644 --- a/core/src/main/scala/org/typelevel/ci/CIString.scala +++ b/core/src/main/scala/org/typelevel/ci/CIString.scala @@ -24,52 +24,46 @@ import scala.math.Ordered /** A case-insensitive String. * - * Two CI strings are equal if and only if they are the same length, and each corresponding - * character is equal after calling either `toUpper` or `toLower`. + * Comparisions are based on the case folded representation of the `String` + * as defined by the Unicode standard. See [[CaseFoldedString]] for a full + * discussion on those rules. * - * Ordering is based on a string comparison after folding each character to uppercase and then back - * to lowercase. - * - * All comparisons are insensitive to locales. + * @note This class differs from [[CaseFoldedString]] in that it keeps a + * reference to original input `String` in whatever form it was + * given. This makes [[CIString]] useful if you which to perform case + * insensitive operations on a `String`, but then recover the original, + * unaltered form. If you do not care about the original input form, + * and just want a single case insensitive `String` value, then + * [[CaseFoldedString]] is more efficient and you should consider using + * that directly. * * @param toString * The original value the CI String was constructed with. */ -final class CIString private (override val toString: String) +final class CIString private (override val toString: String, val asCaseFoldedString: CaseFoldedString) extends Ordered[CIString] with Serializable { + + @deprecated(message = "Please provide a CaseFoldedString directly.", since = "1.3.0") + private def this(toString: String) = { + this(toString, CaseFoldedString(toString)) + } + override def equals(that: Any): Boolean = that match { case that: CIString => - this.toString.equalsIgnoreCase(that.toString) + // Note java.lang.String.equalsIgnoreCase _does not_ handle all title + // case unicode characters, so we can't use it here. See the tests for + // an example. + this.asCaseFoldedString == that.asCaseFoldedString case _ => false } - @transient private[this] var hash = 0 - override def hashCode(): Int = { - if (hash == 0) - hash = calculateHash - hash - } - - private[this] def calculateHash: Int = { - var h = 17 - var i = 0 - val len = toString.length - while (i < len) { - // Strings are equal igoring case if either their uppercase or lowercase - // forms are equal. Equality of one does not imply the other, so we need - // to go in both directions. A character is not guaranteed to make this - // round trip, but it doesn't matter as long as all equal characters - // hash the same. - h = h * 31 + toString.charAt(i).toUpper.toLower - i += 1 - } - h - } + override def hashCode(): Int = + asCaseFoldedString.hashCode override def compare(that: CIString): Int = - this.toString.compareToIgnoreCase(that.toString) + asCaseFoldedString.compare(that.asCaseFoldedString) def transform(f: String => String): CIString = CIString(f(toString)) @@ -87,7 +81,15 @@ final class CIString private (override val toString: String) @suppressUnusedImportWarningForCompat object CIString { - def apply(value: String): CIString = new CIString(value) + + def apply(value: String, useTurkicFolding: Boolean): CIString = + new CIString(value, CaseFoldedString(value, useTurkicFolding)) + + def apply(value: String): CIString = + apply(value, false) + + def fromCaseFoldedString(value: CaseFoldedString): CIString = + new CIString(value.toString, value) val empty = CIString("") diff --git a/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala new file mode 100644 index 0000000..49c9d75 --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala @@ -0,0 +1,161 @@ +package org.typelevel.ci + +import cats._ +import cats.kernel.LowerBounded +import org.typelevel.ci.compat._ +import scala.annotation.tailrec + +/** A case folded `String`. This is a `String` which has been converted into a + * state which is suitable for case insensitive matching under the Unicode + * standard. + * + * This type differs from [[CIString]] in that it does ''not'' retain the + * original input `String` value. That is, this is a destructive + * transformation. You should use [[CaseFoldedString]] instead of + * [[CIString]] when you only want the case insensitive `String` and you + * never want to return the `String` back into the input value. In such cases + * [[CaseFoldedString]] will be more efficient than [[CIString]] as it only + * has to keep around a single `String` in memory. + * + * Case insensitive `String` values under Unicode are not always intuitive, + * especially on the JVM. There are three character cases to consider, lower + * case, upper case, and title case, and not all Unicode codePoints have all + * 3, some only have 2, some only 1. For some codePoints, the JRE standard + * operations don't always work as you'd expect. + * + * {{{ + * scala> val codePoint: Int = 8093 + * val codePoint: Int = 8093 + * + * scala> new String(Character.toChars(codePoint)) * val res0: String = ᾝ + * + * scala> res0.toUpperCase + * val res1: String = ἭΙ + * + * scala> res0.toUpperCase.toLowerCase == res0.toLowerCase + * val res2: Boolean = false + * + * scala> Character.getName(res0.head) + * val res3: String = GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI + * + * scala> res0.toUpperCase.toLowerCase.equalsIgnoreCase(res0.toLowerCase) + * val res4: Boolean = false + * }}} + * + * In this example, given the Unicode character \u1f9d, converting it to + * upper case, then to lower case, is not equal under normal String + * equality. `String.equalsIgnoreCase` also does not work correctly by the + * Unicode standard. + * + * Making matters more complicated, for certain Turkic languages, the case + * folding rules change. See the Unicode standard for a full discussion of + * the topic. + * + * @note For most `String` values the `toString` form of this is lower case + * (when the given character has more than one case), but this is not + * always the case. Certain Unicode scripts have exceptions to this and + * will be case folded into upper case. If you want/need an only lower + * case `String`, you should call `.toString.toLowerCase`. + * + * @see [[https://www.unicode.org/versions/Unicode14.0.0/ch05.pdf#G21790]] + */ +final case class CaseFoldedString private (override val toString: String) extends AnyVal { + + def isEmpty: Boolean = toString.isEmpty + + def nonEmpty: Boolean = !isEmpty + + def length: Int = toString.length + + def size: Int = length + + def trim: CaseFoldedString = + CaseFoldedString(toString.trim) + + private final def copy(toString: String): CaseFoldedString = + CaseFoldedString(toString) +} + +object CaseFoldedString { + + /** Create a [[CaseFoldedString]] from a `String`. + * + * @param turkicFoldingRules if `true`, use the case folding rules for + * applicable to some Turkic languages. + */ + def apply(value: String, turkicFoldingRules: Boolean): CaseFoldedString = { + val builder: java.lang.StringBuilder = new java.lang.StringBuilder(value.length * 3) + val foldCodePoint: Int => Array[Int] = + if (turkicFoldingRules) { + CaseFolds.turkicFullCaseFoldedCodePoints + } else { + CaseFolds.fullCaseFoldedCodePoints + } + + @tailrec + def loop(index: Int): String = + if (index >= value.length) { + builder.toString + } else { + val codePoint: Int = value.codePointAt(index) + foldCodePoint(codePoint).foreach(c => builder.appendCodePoint(c)) + val inc: Int = if (codePoint >= 0x10000) 2 else 1 + loop(index + inc) + } + + new CaseFoldedString(loop(0)) + } + + /** Create a [[CaseFoldedString]] from a `String`. + * + * @note This factory method does ''not'' use the Turkic case folding + * rules. For the majority of languages this is the correct method of + * case folding. If you know your `String` is specific to one of the + * Turkic languages which use special case folding rules, you can use + * the secondary factory method to enable case folding under those + * rules. + */ + def apply(value: String): CaseFoldedString = + apply(value, false) + + val empty: CaseFoldedString = + CaseFoldedString("") + + implicit val hashAndOrderForCaseFoldedString: Hash[CaseFoldedString] with Order[CaseFoldedString] = + new Hash[CaseFoldedString] with Order[CaseFoldedString] { + override def hash(x: CaseFoldedString): Int = + x.hashCode + + override def compare(x: CaseFoldedString, y: CaseFoldedString): Int = + x.compare(y) + } + + implicit val orderingForCaseFoldedString: Ordering[CaseFoldedString] = + hashAndOrderForCaseFoldedString.toOrdering + + implicit val showForCaseFoldedString: Show[CaseFoldedString] = + Show.fromToString + + implicit val lowerBoundForCaseFoldedString: LowerBounded[CaseFoldedString] = + new LowerBounded[CaseFoldedString] { + override val partialOrder: PartialOrder[CaseFoldedString] = + hashAndOrderForCaseFoldedString + + override val minBound: CaseFoldedString = + empty + } + + implicit val monoidForCaseFoldedString: Monoid[CaseFoldedString] = + new Monoid[CaseFoldedString] { + override val empty: CaseFoldedString = CaseFoldedString.empty + + override def combine(x: CaseFoldedString, y: CaseFoldedString): CaseFoldedString = + new CaseFoldedString(x.toString + y.toString) + + override def combineAll(xs: IterableOnce[CaseFoldedString]): CaseFoldedString = { + val sb: StringBuilder = new StringBuilder + xs.iterator.foreach(cfs => sb.append(cfs.toString)) + new CaseFoldedString(sb.toString) + } + } +} diff --git a/core/src/main/scala/org/typelevel/ci/CaseFolds.scala b/core/src/main/scala/org/typelevel/ci/CaseFolds.scala new file mode 100644 index 0000000..66ce86d --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CaseFolds.scala @@ -0,0 +1,1550 @@ +package org.typelevel.ci + +import java.util.Locale +import scala.annotation.tailrec + +private[ci] object CaseFolds { + + def turkicFullCaseFoldedCodePoints(codePoint: Int): Array[Int] = + codePoint match { + case 0x0049 => Array(0x0131) // LATIN CAPITAL LETTER I + case 0x0130 => Array(0x0069) // LATIN CAPITAL LETTER I WITH DOT ABOVE + case _ => + fullCaseFoldedCodePoints(codePoint) + } + + def fullCaseFoldedCodePoints(codePoint: Int): Array[Int] = + codePoint match { + case 0x0041 => Array(0x0061) // LATIN CAPITAL LETTER A + case 0x0042 => Array(0x0062) // LATIN CAPITAL LETTER B + case 0x0043 => Array(0x0063) // LATIN CAPITAL LETTER C + case 0x0044 => Array(0x0064) // LATIN CAPITAL LETTER D + case 0x0045 => Array(0x0065) // LATIN CAPITAL LETTER E + case 0x0046 => Array(0x0066) // LATIN CAPITAL LETTER F + case 0x0047 => Array(0x0067) // LATIN CAPITAL LETTER G + case 0x0048 => Array(0x0068) // LATIN CAPITAL LETTER H + case 0x0049 => Array(0x0069) // LATIN CAPITAL LETTER I + case 0x004A => Array(0x006A) // LATIN CAPITAL LETTER J + case 0x004B => Array(0x006B) // LATIN CAPITAL LETTER K + case 0x004C => Array(0x006C) // LATIN CAPITAL LETTER L + case 0x004D => Array(0x006D) // LATIN CAPITAL LETTER M + case 0x004E => Array(0x006E) // LATIN CAPITAL LETTER N + case 0x004F => Array(0x006F) // LATIN CAPITAL LETTER O + case 0x0050 => Array(0x0070) // LATIN CAPITAL LETTER P + case 0x0051 => Array(0x0071) // LATIN CAPITAL LETTER Q + case 0x0052 => Array(0x0072) // LATIN CAPITAL LETTER R + case 0x0053 => Array(0x0073) // LATIN CAPITAL LETTER S + case 0x0054 => Array(0x0074) // LATIN CAPITAL LETTER T + case 0x0055 => Array(0x0075) // LATIN CAPITAL LETTER U + case 0x0056 => Array(0x0076) // LATIN CAPITAL LETTER V + case 0x0057 => Array(0x0077) // LATIN CAPITAL LETTER W + case 0x0058 => Array(0x0078) // LATIN CAPITAL LETTER X + case 0x0059 => Array(0x0079) // LATIN CAPITAL LETTER Y + case 0x005A => Array(0x007A) // LATIN CAPITAL LETTER Z + case 0x00B5 => Array(0x03BC) // MICRO SIGN + case 0x00C0 => Array(0x00E0) // LATIN CAPITAL LETTER A WITH GRAVE + case 0x00C1 => Array(0x00E1) // LATIN CAPITAL LETTER A WITH ACUTE + case 0x00C2 => Array(0x00E2) // LATIN CAPITAL LETTER A WITH CIRCUMFLEX + case 0x00C3 => Array(0x00E3) // LATIN CAPITAL LETTER A WITH TILDE + case 0x00C4 => Array(0x00E4) // LATIN CAPITAL LETTER A WITH DIAERESIS + case 0x00C5 => Array(0x00E5) // LATIN CAPITAL LETTER A WITH RING ABOVE + case 0x00C6 => Array(0x00E6) // LATIN CAPITAL LETTER AE + case 0x00C7 => Array(0x00E7) // LATIN CAPITAL LETTER C WITH CEDILLA + case 0x00C8 => Array(0x00E8) // LATIN CAPITAL LETTER E WITH GRAVE + case 0x00C9 => Array(0x00E9) // LATIN CAPITAL LETTER E WITH ACUTE + case 0x00CA => Array(0x00EA) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX + case 0x00CB => Array(0x00EB) // LATIN CAPITAL LETTER E WITH DIAERESIS + case 0x00CC => Array(0x00EC) // LATIN CAPITAL LETTER I WITH GRAVE + case 0x00CD => Array(0x00ED) // LATIN CAPITAL LETTER I WITH ACUTE + case 0x00CE => Array(0x00EE) // LATIN CAPITAL LETTER I WITH CIRCUMFLEX + case 0x00CF => Array(0x00EF) // LATIN CAPITAL LETTER I WITH DIAERESIS + case 0x00D0 => Array(0x00F0) // LATIN CAPITAL LETTER ETH + case 0x00D1 => Array(0x00F1) // LATIN CAPITAL LETTER N WITH TILDE + case 0x00D2 => Array(0x00F2) // LATIN CAPITAL LETTER O WITH GRAVE + case 0x00D3 => Array(0x00F3) // LATIN CAPITAL LETTER O WITH ACUTE + case 0x00D4 => Array(0x00F4) // LATIN CAPITAL LETTER O WITH CIRCUMFLEX + case 0x00D5 => Array(0x00F5) // LATIN CAPITAL LETTER O WITH TILDE + case 0x00D6 => Array(0x00F6) // LATIN CAPITAL LETTER O WITH DIAERESIS + case 0x00D8 => Array(0x00F8) // LATIN CAPITAL LETTER O WITH STROKE + case 0x00D9 => Array(0x00F9) // LATIN CAPITAL LETTER U WITH GRAVE + case 0x00DA => Array(0x00FA) // LATIN CAPITAL LETTER U WITH ACUTE + case 0x00DB => Array(0x00FB) // LATIN CAPITAL LETTER U WITH CIRCUMFLEX + case 0x00DC => Array(0x00FC) // LATIN CAPITAL LETTER U WITH DIAERESIS + case 0x00DD => Array(0x00FD) // LATIN CAPITAL LETTER Y WITH ACUTE + case 0x00DE => Array(0x00FE) // LATIN CAPITAL LETTER THORN + case 0x0100 => Array(0x0101) // LATIN CAPITAL LETTER A WITH MACRON + case 0x0102 => Array(0x0103) // LATIN CAPITAL LETTER A WITH BREVE + case 0x0104 => Array(0x0105) // LATIN CAPITAL LETTER A WITH OGONEK + case 0x0106 => Array(0x0107) // LATIN CAPITAL LETTER C WITH ACUTE + case 0x0108 => Array(0x0109) // LATIN CAPITAL LETTER C WITH CIRCUMFLEX + case 0x010A => Array(0x010B) // LATIN CAPITAL LETTER C WITH DOT ABOVE + case 0x010C => Array(0x010D) // LATIN CAPITAL LETTER C WITH CARON + case 0x010E => Array(0x010F) // LATIN CAPITAL LETTER D WITH CARON + case 0x0110 => Array(0x0111) // LATIN CAPITAL LETTER D WITH STROKE + case 0x0112 => Array(0x0113) // LATIN CAPITAL LETTER E WITH MACRON + case 0x0114 => Array(0x0115) // LATIN CAPITAL LETTER E WITH BREVE + case 0x0116 => Array(0x0117) // LATIN CAPITAL LETTER E WITH DOT ABOVE + case 0x0118 => Array(0x0119) // LATIN CAPITAL LETTER E WITH OGONEK + case 0x011A => Array(0x011B) // LATIN CAPITAL LETTER E WITH CARON + case 0x011C => Array(0x011D) // LATIN CAPITAL LETTER G WITH CIRCUMFLEX + case 0x011E => Array(0x011F) // LATIN CAPITAL LETTER G WITH BREVE + case 0x0120 => Array(0x0121) // LATIN CAPITAL LETTER G WITH DOT ABOVE + case 0x0122 => Array(0x0123) // LATIN CAPITAL LETTER G WITH CEDILLA + case 0x0124 => Array(0x0125) // LATIN CAPITAL LETTER H WITH CIRCUMFLEX + case 0x0126 => Array(0x0127) // LATIN CAPITAL LETTER H WITH STROKE + case 0x0128 => Array(0x0129) // LATIN CAPITAL LETTER I WITH TILDE + case 0x012A => Array(0x012B) // LATIN CAPITAL LETTER I WITH MACRON + case 0x012C => Array(0x012D) // LATIN CAPITAL LETTER I WITH BREVE + case 0x012E => Array(0x012F) // LATIN CAPITAL LETTER I WITH OGONEK + case 0x0132 => Array(0x0133) // LATIN CAPITAL LIGATURE IJ + case 0x0134 => Array(0x0135) // LATIN CAPITAL LETTER J WITH CIRCUMFLEX + case 0x0136 => Array(0x0137) // LATIN CAPITAL LETTER K WITH CEDILLA + case 0x0139 => Array(0x013A) // LATIN CAPITAL LETTER L WITH ACUTE + case 0x013B => Array(0x013C) // LATIN CAPITAL LETTER L WITH CEDILLA + case 0x013D => Array(0x013E) // LATIN CAPITAL LETTER L WITH CARON + case 0x013F => Array(0x0140) // LATIN CAPITAL LETTER L WITH MIDDLE DOT + case 0x0141 => Array(0x0142) // LATIN CAPITAL LETTER L WITH STROKE + case 0x0143 => Array(0x0144) // LATIN CAPITAL LETTER N WITH ACUTE + case 0x0145 => Array(0x0146) // LATIN CAPITAL LETTER N WITH CEDILLA + case 0x0147 => Array(0x0148) // LATIN CAPITAL LETTER N WITH CARON + case 0x014A => Array(0x014B) // LATIN CAPITAL LETTER ENG + case 0x014C => Array(0x014D) // LATIN CAPITAL LETTER O WITH MACRON + case 0x014E => Array(0x014F) // LATIN CAPITAL LETTER O WITH BREVE + case 0x0150 => Array(0x0151) // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + case 0x0152 => Array(0x0153) // LATIN CAPITAL LIGATURE OE + case 0x0154 => Array(0x0155) // LATIN CAPITAL LETTER R WITH ACUTE + case 0x0156 => Array(0x0157) // LATIN CAPITAL LETTER R WITH CEDILLA + case 0x0158 => Array(0x0159) // LATIN CAPITAL LETTER R WITH CARON + case 0x015A => Array(0x015B) // LATIN CAPITAL LETTER S WITH ACUTE + case 0x015C => Array(0x015D) // LATIN CAPITAL LETTER S WITH CIRCUMFLEX + case 0x015E => Array(0x015F) // LATIN CAPITAL LETTER S WITH CEDILLA + case 0x0160 => Array(0x0161) // LATIN CAPITAL LETTER S WITH CARON + case 0x0162 => Array(0x0163) // LATIN CAPITAL LETTER T WITH CEDILLA + case 0x0164 => Array(0x0165) // LATIN CAPITAL LETTER T WITH CARON + case 0x0166 => Array(0x0167) // LATIN CAPITAL LETTER T WITH STROKE + case 0x0168 => Array(0x0169) // LATIN CAPITAL LETTER U WITH TILDE + case 0x016A => Array(0x016B) // LATIN CAPITAL LETTER U WITH MACRON + case 0x016C => Array(0x016D) // LATIN CAPITAL LETTER U WITH BREVE + case 0x016E => Array(0x016F) // LATIN CAPITAL LETTER U WITH RING ABOVE + case 0x0170 => Array(0x0171) // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + case 0x0172 => Array(0x0173) // LATIN CAPITAL LETTER U WITH OGONEK + case 0x0174 => Array(0x0175) // LATIN CAPITAL LETTER W WITH CIRCUMFLEX + case 0x0176 => Array(0x0177) // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + case 0x0178 => Array(0x00FF) // LATIN CAPITAL LETTER Y WITH DIAERESIS + case 0x0179 => Array(0x017A) // LATIN CAPITAL LETTER Z WITH ACUTE + case 0x017B => Array(0x017C) // LATIN CAPITAL LETTER Z WITH DOT ABOVE + case 0x017D => Array(0x017E) // LATIN CAPITAL LETTER Z WITH CARON + case 0x017F => Array(0x0073) // LATIN SMALL LETTER LONG S + case 0x0181 => Array(0x0253) // LATIN CAPITAL LETTER B WITH HOOK + case 0x0182 => Array(0x0183) // LATIN CAPITAL LETTER B WITH TOPBAR + case 0x0184 => Array(0x0185) // LATIN CAPITAL LETTER TONE SIX + case 0x0186 => Array(0x0254) // LATIN CAPITAL LETTER OPEN O + case 0x0187 => Array(0x0188) // LATIN CAPITAL LETTER C WITH HOOK + case 0x0189 => Array(0x0256) // LATIN CAPITAL LETTER AFRICAN D + case 0x018A => Array(0x0257) // LATIN CAPITAL LETTER D WITH HOOK + case 0x018B => Array(0x018C) // LATIN CAPITAL LETTER D WITH TOPBAR + case 0x018E => Array(0x01DD) // LATIN CAPITAL LETTER REVERSED E + case 0x018F => Array(0x0259) // LATIN CAPITAL LETTER SCHWA + case 0x0190 => Array(0x025B) // LATIN CAPITAL LETTER OPEN E + case 0x0191 => Array(0x0192) // LATIN CAPITAL LETTER F WITH HOOK + case 0x0193 => Array(0x0260) // LATIN CAPITAL LETTER G WITH HOOK + case 0x0194 => Array(0x0263) // LATIN CAPITAL LETTER GAMMA + case 0x0196 => Array(0x0269) // LATIN CAPITAL LETTER IOTA + case 0x0197 => Array(0x0268) // LATIN CAPITAL LETTER I WITH STROKE + case 0x0198 => Array(0x0199) // LATIN CAPITAL LETTER K WITH HOOK + case 0x019C => Array(0x026F) // LATIN CAPITAL LETTER TURNED M + case 0x019D => Array(0x0272) // LATIN CAPITAL LETTER N WITH LEFT HOOK + case 0x019F => Array(0x0275) // LATIN CAPITAL LETTER O WITH MIDDLE TILDE + case 0x01A0 => Array(0x01A1) // LATIN CAPITAL LETTER O WITH HORN + case 0x01A2 => Array(0x01A3) // LATIN CAPITAL LETTER OI + case 0x01A4 => Array(0x01A5) // LATIN CAPITAL LETTER P WITH HOOK + case 0x01A6 => Array(0x0280) // LATIN LETTER YR + case 0x01A7 => Array(0x01A8) // LATIN CAPITAL LETTER TONE TWO + case 0x01A9 => Array(0x0283) // LATIN CAPITAL LETTER ESH + case 0x01AC => Array(0x01AD) // LATIN CAPITAL LETTER T WITH HOOK + case 0x01AE => Array(0x0288) // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK + case 0x01AF => Array(0x01B0) // LATIN CAPITAL LETTER U WITH HORN + case 0x01B1 => Array(0x028A) // LATIN CAPITAL LETTER UPSILON + case 0x01B2 => Array(0x028B) // LATIN CAPITAL LETTER V WITH HOOK + case 0x01B3 => Array(0x01B4) // LATIN CAPITAL LETTER Y WITH HOOK + case 0x01B5 => Array(0x01B6) // LATIN CAPITAL LETTER Z WITH STROKE + case 0x01B7 => Array(0x0292) // LATIN CAPITAL LETTER EZH + case 0x01B8 => Array(0x01B9) // LATIN CAPITAL LETTER EZH REVERSED + case 0x01BC => Array(0x01BD) // LATIN CAPITAL LETTER TONE FIVE + case 0x01C4 => Array(0x01C6) // LATIN CAPITAL LETTER DZ WITH CARON + case 0x01C5 => Array(0x01C6) // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON + case 0x01C7 => Array(0x01C9) // LATIN CAPITAL LETTER LJ + case 0x01C8 => Array(0x01C9) // LATIN CAPITAL LETTER L WITH SMALL LETTER J + case 0x01CA => Array(0x01CC) // LATIN CAPITAL LETTER NJ + case 0x01CB => Array(0x01CC) // LATIN CAPITAL LETTER N WITH SMALL LETTER J + case 0x01CD => Array(0x01CE) // LATIN CAPITAL LETTER A WITH CARON + case 0x01CF => Array(0x01D0) // LATIN CAPITAL LETTER I WITH CARON + case 0x01D1 => Array(0x01D2) // LATIN CAPITAL LETTER O WITH CARON + case 0x01D3 => Array(0x01D4) // LATIN CAPITAL LETTER U WITH CARON + case 0x01D5 => Array(0x01D6) // LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON + case 0x01D7 => Array(0x01D8) // LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE + case 0x01D9 => Array(0x01DA) // LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON + case 0x01DB => Array(0x01DC) // LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE + case 0x01DE => Array(0x01DF) // LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON + case 0x01E0 => Array(0x01E1) // LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON + case 0x01E2 => Array(0x01E3) // LATIN CAPITAL LETTER AE WITH MACRON + case 0x01E4 => Array(0x01E5) // LATIN CAPITAL LETTER G WITH STROKE + case 0x01E6 => Array(0x01E7) // LATIN CAPITAL LETTER G WITH CARON + case 0x01E8 => Array(0x01E9) // LATIN CAPITAL LETTER K WITH CARON + case 0x01EA => Array(0x01EB) // LATIN CAPITAL LETTER O WITH OGONEK + case 0x01EC => Array(0x01ED) // LATIN CAPITAL LETTER O WITH OGONEK AND MACRON + case 0x01EE => Array(0x01EF) // LATIN CAPITAL LETTER EZH WITH CARON + case 0x01F1 => Array(0x01F3) // LATIN CAPITAL LETTER DZ + case 0x01F2 => Array(0x01F3) // LATIN CAPITAL LETTER D WITH SMALL LETTER Z + case 0x01F4 => Array(0x01F5) // LATIN CAPITAL LETTER G WITH ACUTE + case 0x01F6 => Array(0x0195) // LATIN CAPITAL LETTER HWAIR + case 0x01F7 => Array(0x01BF) // LATIN CAPITAL LETTER WYNN + case 0x01F8 => Array(0x01F9) // LATIN CAPITAL LETTER N WITH GRAVE + case 0x01FA => Array(0x01FB) // LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE + case 0x01FC => Array(0x01FD) // LATIN CAPITAL LETTER AE WITH ACUTE + case 0x01FE => Array(0x01FF) // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE + case 0x0200 => Array(0x0201) // LATIN CAPITAL LETTER A WITH DOUBLE GRAVE + case 0x0202 => Array(0x0203) // LATIN CAPITAL LETTER A WITH INVERTED BREVE + case 0x0204 => Array(0x0205) // LATIN CAPITAL LETTER E WITH DOUBLE GRAVE + case 0x0206 => Array(0x0207) // LATIN CAPITAL LETTER E WITH INVERTED BREVE + case 0x0208 => Array(0x0209) // LATIN CAPITAL LETTER I WITH DOUBLE GRAVE + case 0x020A => Array(0x020B) // LATIN CAPITAL LETTER I WITH INVERTED BREVE + case 0x020C => Array(0x020D) // LATIN CAPITAL LETTER O WITH DOUBLE GRAVE + case 0x020E => Array(0x020F) // LATIN CAPITAL LETTER O WITH INVERTED BREVE + case 0x0210 => Array(0x0211) // LATIN CAPITAL LETTER R WITH DOUBLE GRAVE + case 0x0212 => Array(0x0213) // LATIN CAPITAL LETTER R WITH INVERTED BREVE + case 0x0214 => Array(0x0215) // LATIN CAPITAL LETTER U WITH DOUBLE GRAVE + case 0x0216 => Array(0x0217) // LATIN CAPITAL LETTER U WITH INVERTED BREVE + case 0x0218 => Array(0x0219) // LATIN CAPITAL LETTER S WITH COMMA BELOW + case 0x021A => Array(0x021B) // LATIN CAPITAL LETTER T WITH COMMA BELOW + case 0x021C => Array(0x021D) // LATIN CAPITAL LETTER YOGH + case 0x021E => Array(0x021F) // LATIN CAPITAL LETTER H WITH CARON + case 0x0220 => Array(0x019E) // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG + case 0x0222 => Array(0x0223) // LATIN CAPITAL LETTER OU + case 0x0224 => Array(0x0225) // LATIN CAPITAL LETTER Z WITH HOOK + case 0x0226 => Array(0x0227) // LATIN CAPITAL LETTER A WITH DOT ABOVE + case 0x0228 => Array(0x0229) // LATIN CAPITAL LETTER E WITH CEDILLA + case 0x022A => Array(0x022B) // LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON + case 0x022C => Array(0x022D) // LATIN CAPITAL LETTER O WITH TILDE AND MACRON + case 0x022E => Array(0x022F) // LATIN CAPITAL LETTER O WITH DOT ABOVE + case 0x0230 => Array(0x0231) // LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON + case 0x0232 => Array(0x0233) // LATIN CAPITAL LETTER Y WITH MACRON + case 0x023A => Array(0x2C65) // LATIN CAPITAL LETTER A WITH STROKE + case 0x023B => Array(0x023C) // LATIN CAPITAL LETTER C WITH STROKE + case 0x023D => Array(0x019A) // LATIN CAPITAL LETTER L WITH BAR + case 0x023E => Array(0x2C66) // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE + case 0x0241 => Array(0x0242) // LATIN CAPITAL LETTER GLOTTAL STOP + case 0x0243 => Array(0x0180) // LATIN CAPITAL LETTER B WITH STROKE + case 0x0244 => Array(0x0289) // LATIN CAPITAL LETTER U BAR + case 0x0245 => Array(0x028C) // LATIN CAPITAL LETTER TURNED V + case 0x0246 => Array(0x0247) // LATIN CAPITAL LETTER E WITH STROKE + case 0x0248 => Array(0x0249) // LATIN CAPITAL LETTER J WITH STROKE + case 0x024A => Array(0x024B) // LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL + case 0x024C => Array(0x024D) // LATIN CAPITAL LETTER R WITH STROKE + case 0x024E => Array(0x024F) // LATIN CAPITAL LETTER Y WITH STROKE + case 0x0345 => Array(0x03B9) // COMBINING GREEK YPOGEGRAMMENI + case 0x0370 => Array(0x0371) // GREEK CAPITAL LETTER HETA + case 0x0372 => Array(0x0373) // GREEK CAPITAL LETTER ARCHAIC SAMPI + case 0x0376 => Array(0x0377) // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA + case 0x037F => Array(0x03F3) // GREEK CAPITAL LETTER YOT + case 0x0386 => Array(0x03AC) // GREEK CAPITAL LETTER ALPHA WITH TONOS + case 0x0388 => Array(0x03AD) // GREEK CAPITAL LETTER EPSILON WITH TONOS + case 0x0389 => Array(0x03AE) // GREEK CAPITAL LETTER ETA WITH TONOS + case 0x038A => Array(0x03AF) // GREEK CAPITAL LETTER IOTA WITH TONOS + case 0x038C => Array(0x03CC) // GREEK CAPITAL LETTER OMICRON WITH TONOS + case 0x038E => Array(0x03CD) // GREEK CAPITAL LETTER UPSILON WITH TONOS + case 0x038F => Array(0x03CE) // GREEK CAPITAL LETTER OMEGA WITH TONOS + case 0x0391 => Array(0x03B1) // GREEK CAPITAL LETTER ALPHA + case 0x0392 => Array(0x03B2) // GREEK CAPITAL LETTER BETA + case 0x0393 => Array(0x03B3) // GREEK CAPITAL LETTER GAMMA + case 0x0394 => Array(0x03B4) // GREEK CAPITAL LETTER DELTA + case 0x0395 => Array(0x03B5) // GREEK CAPITAL LETTER EPSILON + case 0x0396 => Array(0x03B6) // GREEK CAPITAL LETTER ZETA + case 0x0397 => Array(0x03B7) // GREEK CAPITAL LETTER ETA + case 0x0398 => Array(0x03B8) // GREEK CAPITAL LETTER THETA + case 0x0399 => Array(0x03B9) // GREEK CAPITAL LETTER IOTA + case 0x039A => Array(0x03BA) // GREEK CAPITAL LETTER KAPPA + case 0x039B => Array(0x03BB) // GREEK CAPITAL LETTER LAMDA + case 0x039C => Array(0x03BC) // GREEK CAPITAL LETTER MU + case 0x039D => Array(0x03BD) // GREEK CAPITAL LETTER NU + case 0x039E => Array(0x03BE) // GREEK CAPITAL LETTER XI + case 0x039F => Array(0x03BF) // GREEK CAPITAL LETTER OMICRON + case 0x03A0 => Array(0x03C0) // GREEK CAPITAL LETTER PI + case 0x03A1 => Array(0x03C1) // GREEK CAPITAL LETTER RHO + case 0x03A3 => Array(0x03C3) // GREEK CAPITAL LETTER SIGMA + case 0x03A4 => Array(0x03C4) // GREEK CAPITAL LETTER TAU + case 0x03A5 => Array(0x03C5) // GREEK CAPITAL LETTER UPSILON + case 0x03A6 => Array(0x03C6) // GREEK CAPITAL LETTER PHI + case 0x03A7 => Array(0x03C7) // GREEK CAPITAL LETTER CHI + case 0x03A8 => Array(0x03C8) // GREEK CAPITAL LETTER PSI + case 0x03A9 => Array(0x03C9) // GREEK CAPITAL LETTER OMEGA + case 0x03AA => Array(0x03CA) // GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + case 0x03AB => Array(0x03CB) // GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + case 0x03C2 => Array(0x03C3) // GREEK SMALL LETTER FINAL SIGMA + case 0x03CF => Array(0x03D7) // GREEK CAPITAL KAI SYMBOL + case 0x03D0 => Array(0x03B2) // GREEK BETA SYMBOL + case 0x03D1 => Array(0x03B8) // GREEK THETA SYMBOL + case 0x03D5 => Array(0x03C6) // GREEK PHI SYMBOL + case 0x03D6 => Array(0x03C0) // GREEK PI SYMBOL + case 0x03D8 => Array(0x03D9) // GREEK LETTER ARCHAIC KOPPA + case 0x03DA => Array(0x03DB) // GREEK LETTER STIGMA + case 0x03DC => Array(0x03DD) // GREEK LETTER DIGAMMA + case 0x03DE => Array(0x03DF) // GREEK LETTER KOPPA + case 0x03E0 => Array(0x03E1) // GREEK LETTER SAMPI + case 0x03E2 => Array(0x03E3) // COPTIC CAPITAL LETTER SHEI + case 0x03E4 => Array(0x03E5) // COPTIC CAPITAL LETTER FEI + case 0x03E6 => Array(0x03E7) // COPTIC CAPITAL LETTER KHEI + case 0x03E8 => Array(0x03E9) // COPTIC CAPITAL LETTER HORI + case 0x03EA => Array(0x03EB) // COPTIC CAPITAL LETTER GANGIA + case 0x03EC => Array(0x03ED) // COPTIC CAPITAL LETTER SHIMA + case 0x03EE => Array(0x03EF) // COPTIC CAPITAL LETTER DEI + case 0x03F0 => Array(0x03BA) // GREEK KAPPA SYMBOL + case 0x03F1 => Array(0x03C1) // GREEK RHO SYMBOL + case 0x03F4 => Array(0x03B8) // GREEK CAPITAL THETA SYMBOL + case 0x03F5 => Array(0x03B5) // GREEK LUNATE EPSILON SYMBOL + case 0x03F7 => Array(0x03F8) // GREEK CAPITAL LETTER SHO + case 0x03F9 => Array(0x03F2) // GREEK CAPITAL LUNATE SIGMA SYMBOL + case 0x03FA => Array(0x03FB) // GREEK CAPITAL LETTER SAN + case 0x03FD => Array(0x037B) // GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL + case 0x03FE => Array(0x037C) // GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL + case 0x03FF => Array(0x037D) // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL + case 0x0400 => Array(0x0450) // CYRILLIC CAPITAL LETTER IE WITH GRAVE + case 0x0401 => Array(0x0451) // CYRILLIC CAPITAL LETTER IO + case 0x0402 => Array(0x0452) // CYRILLIC CAPITAL LETTER DJE + case 0x0403 => Array(0x0453) // CYRILLIC CAPITAL LETTER GJE + case 0x0404 => Array(0x0454) // CYRILLIC CAPITAL LETTER UKRAINIAN IE + case 0x0405 => Array(0x0455) // CYRILLIC CAPITAL LETTER DZE + case 0x0406 => Array(0x0456) // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + case 0x0407 => Array(0x0457) // CYRILLIC CAPITAL LETTER YI + case 0x0408 => Array(0x0458) // CYRILLIC CAPITAL LETTER JE + case 0x0409 => Array(0x0459) // CYRILLIC CAPITAL LETTER LJE + case 0x040A => Array(0x045A) // CYRILLIC CAPITAL LETTER NJE + case 0x040B => Array(0x045B) // CYRILLIC CAPITAL LETTER TSHE + case 0x040C => Array(0x045C) // CYRILLIC CAPITAL LETTER KJE + case 0x040D => Array(0x045D) // CYRILLIC CAPITAL LETTER I WITH GRAVE + case 0x040E => Array(0x045E) // CYRILLIC CAPITAL LETTER SHORT U + case 0x040F => Array(0x045F) // CYRILLIC CAPITAL LETTER DZHE + case 0x0410 => Array(0x0430) // CYRILLIC CAPITAL LETTER A + case 0x0411 => Array(0x0431) // CYRILLIC CAPITAL LETTER BE + case 0x0412 => Array(0x0432) // CYRILLIC CAPITAL LETTER VE + case 0x0413 => Array(0x0433) // CYRILLIC CAPITAL LETTER GHE + case 0x0414 => Array(0x0434) // CYRILLIC CAPITAL LETTER DE + case 0x0415 => Array(0x0435) // CYRILLIC CAPITAL LETTER IE + case 0x0416 => Array(0x0436) // CYRILLIC CAPITAL LETTER ZHE + case 0x0417 => Array(0x0437) // CYRILLIC CAPITAL LETTER ZE + case 0x0418 => Array(0x0438) // CYRILLIC CAPITAL LETTER I + case 0x0419 => Array(0x0439) // CYRILLIC CAPITAL LETTER SHORT I + case 0x041A => Array(0x043A) // CYRILLIC CAPITAL LETTER KA + case 0x041B => Array(0x043B) // CYRILLIC CAPITAL LETTER EL + case 0x041C => Array(0x043C) // CYRILLIC CAPITAL LETTER EM + case 0x041D => Array(0x043D) // CYRILLIC CAPITAL LETTER EN + case 0x041E => Array(0x043E) // CYRILLIC CAPITAL LETTER O + case 0x041F => Array(0x043F) // CYRILLIC CAPITAL LETTER PE + case 0x0420 => Array(0x0440) // CYRILLIC CAPITAL LETTER ER + case 0x0421 => Array(0x0441) // CYRILLIC CAPITAL LETTER ES + case 0x0422 => Array(0x0442) // CYRILLIC CAPITAL LETTER TE + case 0x0423 => Array(0x0443) // CYRILLIC CAPITAL LETTER U + case 0x0424 => Array(0x0444) // CYRILLIC CAPITAL LETTER EF + case 0x0425 => Array(0x0445) // CYRILLIC CAPITAL LETTER HA + case 0x0426 => Array(0x0446) // CYRILLIC CAPITAL LETTER TSE + case 0x0427 => Array(0x0447) // CYRILLIC CAPITAL LETTER CHE + case 0x0428 => Array(0x0448) // CYRILLIC CAPITAL LETTER SHA + case 0x0429 => Array(0x0449) // CYRILLIC CAPITAL LETTER SHCHA + case 0x042A => Array(0x044A) // CYRILLIC CAPITAL LETTER HARD SIGN + case 0x042B => Array(0x044B) // CYRILLIC CAPITAL LETTER YERU + case 0x042C => Array(0x044C) // CYRILLIC CAPITAL LETTER SOFT SIGN + case 0x042D => Array(0x044D) // CYRILLIC CAPITAL LETTER E + case 0x042E => Array(0x044E) // CYRILLIC CAPITAL LETTER YU + case 0x042F => Array(0x044F) // CYRILLIC CAPITAL LETTER YA + case 0x0460 => Array(0x0461) // CYRILLIC CAPITAL LETTER OMEGA + case 0x0462 => Array(0x0463) // CYRILLIC CAPITAL LETTER YAT + case 0x0464 => Array(0x0465) // CYRILLIC CAPITAL LETTER IOTIFIED E + case 0x0466 => Array(0x0467) // CYRILLIC CAPITAL LETTER LITTLE YUS + case 0x0468 => Array(0x0469) // CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS + case 0x046A => Array(0x046B) // CYRILLIC CAPITAL LETTER BIG YUS + case 0x046C => Array(0x046D) // CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS + case 0x046E => Array(0x046F) // CYRILLIC CAPITAL LETTER KSI + case 0x0470 => Array(0x0471) // CYRILLIC CAPITAL LETTER PSI + case 0x0472 => Array(0x0473) // CYRILLIC CAPITAL LETTER FITA + case 0x0474 => Array(0x0475) // CYRILLIC CAPITAL LETTER IZHITSA + case 0x0476 => Array(0x0477) // CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT + case 0x0478 => Array(0x0479) // CYRILLIC CAPITAL LETTER UK + case 0x047A => Array(0x047B) // CYRILLIC CAPITAL LETTER ROUND OMEGA + case 0x047C => Array(0x047D) // CYRILLIC CAPITAL LETTER OMEGA WITH TITLO + case 0x047E => Array(0x047F) // CYRILLIC CAPITAL LETTER OT + case 0x0480 => Array(0x0481) // CYRILLIC CAPITAL LETTER KOPPA + case 0x048A => Array(0x048B) // CYRILLIC CAPITAL LETTER SHORT I WITH TAIL + case 0x048C => Array(0x048D) // CYRILLIC CAPITAL LETTER SEMISOFT SIGN + case 0x048E => Array(0x048F) // CYRILLIC CAPITAL LETTER ER WITH TICK + case 0x0490 => Array(0x0491) // CYRILLIC CAPITAL LETTER GHE WITH UPTURN + case 0x0492 => Array(0x0493) // CYRILLIC CAPITAL LETTER GHE WITH STROKE + case 0x0494 => Array(0x0495) // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK + case 0x0496 => Array(0x0497) // CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER + case 0x0498 => Array(0x0499) // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER + case 0x049A => Array(0x049B) // CYRILLIC CAPITAL LETTER KA WITH DESCENDER + case 0x049C => Array(0x049D) // CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE + case 0x049E => Array(0x049F) // CYRILLIC CAPITAL LETTER KA WITH STROKE + case 0x04A0 => Array(0x04A1) // CYRILLIC CAPITAL LETTER BASHKIR KA + case 0x04A2 => Array(0x04A3) // CYRILLIC CAPITAL LETTER EN WITH DESCENDER + case 0x04A4 => Array(0x04A5) // CYRILLIC CAPITAL LIGATURE EN GHE + case 0x04A6 => Array(0x04A7) // CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK + case 0x04A8 => Array(0x04A9) // CYRILLIC CAPITAL LETTER ABKHASIAN HA + case 0x04AA => Array(0x04AB) // CYRILLIC CAPITAL LETTER ES WITH DESCENDER + case 0x04AC => Array(0x04AD) // CYRILLIC CAPITAL LETTER TE WITH DESCENDER + case 0x04AE => Array(0x04AF) // CYRILLIC CAPITAL LETTER STRAIGHT U + case 0x04B0 => Array(0x04B1) // CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE + case 0x04B2 => Array(0x04B3) // CYRILLIC CAPITAL LETTER HA WITH DESCENDER + case 0x04B4 => Array(0x04B5) // CYRILLIC CAPITAL LIGATURE TE TSE + case 0x04B6 => Array(0x04B7) // CYRILLIC CAPITAL LETTER CHE WITH DESCENDER + case 0x04B8 => Array(0x04B9) // CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE + case 0x04BA => Array(0x04BB) // CYRILLIC CAPITAL LETTER SHHA + case 0x04BC => Array(0x04BD) // CYRILLIC CAPITAL LETTER ABKHASIAN CHE + case 0x04BE => Array(0x04BF) // CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER + case 0x04C0 => Array(0x04CF) // CYRILLIC LETTER PALOCHKA + case 0x04C1 => Array(0x04C2) // CYRILLIC CAPITAL LETTER ZHE WITH BREVE + case 0x04C3 => Array(0x04C4) // CYRILLIC CAPITAL LETTER KA WITH HOOK + case 0x04C5 => Array(0x04C6) // CYRILLIC CAPITAL LETTER EL WITH TAIL + case 0x04C7 => Array(0x04C8) // CYRILLIC CAPITAL LETTER EN WITH HOOK + case 0x04C9 => Array(0x04CA) // CYRILLIC CAPITAL LETTER EN WITH TAIL + case 0x04CB => Array(0x04CC) // CYRILLIC CAPITAL LETTER KHAKASSIAN CHE + case 0x04CD => Array(0x04CE) // CYRILLIC CAPITAL LETTER EM WITH TAIL + case 0x04D0 => Array(0x04D1) // CYRILLIC CAPITAL LETTER A WITH BREVE + case 0x04D2 => Array(0x04D3) // CYRILLIC CAPITAL LETTER A WITH DIAERESIS + case 0x04D4 => Array(0x04D5) // CYRILLIC CAPITAL LIGATURE A IE + case 0x04D6 => Array(0x04D7) // CYRILLIC CAPITAL LETTER IE WITH BREVE + case 0x04D8 => Array(0x04D9) // CYRILLIC CAPITAL LETTER SCHWA + case 0x04DA => Array(0x04DB) // CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS + case 0x04DC => Array(0x04DD) // CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS + case 0x04DE => Array(0x04DF) // CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS + case 0x04E0 => Array(0x04E1) // CYRILLIC CAPITAL LETTER ABKHASIAN DZE + case 0x04E2 => Array(0x04E3) // CYRILLIC CAPITAL LETTER I WITH MACRON + case 0x04E4 => Array(0x04E5) // CYRILLIC CAPITAL LETTER I WITH DIAERESIS + case 0x04E6 => Array(0x04E7) // CYRILLIC CAPITAL LETTER O WITH DIAERESIS + case 0x04E8 => Array(0x04E9) // CYRILLIC CAPITAL LETTER BARRED O + case 0x04EA => Array(0x04EB) // CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS + case 0x04EC => Array(0x04ED) // CYRILLIC CAPITAL LETTER E WITH DIAERESIS + case 0x04EE => Array(0x04EF) // CYRILLIC CAPITAL LETTER U WITH MACRON + case 0x04F0 => Array(0x04F1) // CYRILLIC CAPITAL LETTER U WITH DIAERESIS + case 0x04F2 => Array(0x04F3) // CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE + case 0x04F4 => Array(0x04F5) // CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS + case 0x04F6 => Array(0x04F7) // CYRILLIC CAPITAL LETTER GHE WITH DESCENDER + case 0x04F8 => Array(0x04F9) // CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS + case 0x04FA => Array(0x04FB) // CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK + case 0x04FC => Array(0x04FD) // CYRILLIC CAPITAL LETTER HA WITH HOOK + case 0x04FE => Array(0x04FF) // CYRILLIC CAPITAL LETTER HA WITH STROKE + case 0x0500 => Array(0x0501) // CYRILLIC CAPITAL LETTER KOMI DE + case 0x0502 => Array(0x0503) // CYRILLIC CAPITAL LETTER KOMI DJE + case 0x0504 => Array(0x0505) // CYRILLIC CAPITAL LETTER KOMI ZJE + case 0x0506 => Array(0x0507) // CYRILLIC CAPITAL LETTER KOMI DZJE + case 0x0508 => Array(0x0509) // CYRILLIC CAPITAL LETTER KOMI LJE + case 0x050A => Array(0x050B) // CYRILLIC CAPITAL LETTER KOMI NJE + case 0x050C => Array(0x050D) // CYRILLIC CAPITAL LETTER KOMI SJE + case 0x050E => Array(0x050F) // CYRILLIC CAPITAL LETTER KOMI TJE + case 0x0510 => Array(0x0511) // CYRILLIC CAPITAL LETTER REVERSED ZE + case 0x0512 => Array(0x0513) // CYRILLIC CAPITAL LETTER EL WITH HOOK + case 0x0514 => Array(0x0515) // CYRILLIC CAPITAL LETTER LHA + case 0x0516 => Array(0x0517) // CYRILLIC CAPITAL LETTER RHA + case 0x0518 => Array(0x0519) // CYRILLIC CAPITAL LETTER YAE + case 0x051A => Array(0x051B) // CYRILLIC CAPITAL LETTER QA + case 0x051C => Array(0x051D) // CYRILLIC CAPITAL LETTER WE + case 0x051E => Array(0x051F) // CYRILLIC CAPITAL LETTER ALEUT KA + case 0x0520 => Array(0x0521) // CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK + case 0x0522 => Array(0x0523) // CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK + case 0x0524 => Array(0x0525) // CYRILLIC CAPITAL LETTER PE WITH DESCENDER + case 0x0526 => Array(0x0527) // CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER + case 0x0528 => Array(0x0529) // CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK + case 0x052A => Array(0x052B) // CYRILLIC CAPITAL LETTER DZZHE + case 0x052C => Array(0x052D) // CYRILLIC CAPITAL LETTER DCHE + case 0x052E => Array(0x052F) // CYRILLIC CAPITAL LETTER EL WITH DESCENDER + case 0x0531 => Array(0x0561) // ARMENIAN CAPITAL LETTER AYB + case 0x0532 => Array(0x0562) // ARMENIAN CAPITAL LETTER BEN + case 0x0533 => Array(0x0563) // ARMENIAN CAPITAL LETTER GIM + case 0x0534 => Array(0x0564) // ARMENIAN CAPITAL LETTER DA + case 0x0535 => Array(0x0565) // ARMENIAN CAPITAL LETTER ECH + case 0x0536 => Array(0x0566) // ARMENIAN CAPITAL LETTER ZA + case 0x0537 => Array(0x0567) // ARMENIAN CAPITAL LETTER EH + case 0x0538 => Array(0x0568) // ARMENIAN CAPITAL LETTER ET + case 0x0539 => Array(0x0569) // ARMENIAN CAPITAL LETTER TO + case 0x053A => Array(0x056A) // ARMENIAN CAPITAL LETTER ZHE + case 0x053B => Array(0x056B) // ARMENIAN CAPITAL LETTER INI + case 0x053C => Array(0x056C) // ARMENIAN CAPITAL LETTER LIWN + case 0x053D => Array(0x056D) // ARMENIAN CAPITAL LETTER XEH + case 0x053E => Array(0x056E) // ARMENIAN CAPITAL LETTER CA + case 0x053F => Array(0x056F) // ARMENIAN CAPITAL LETTER KEN + case 0x0540 => Array(0x0570) // ARMENIAN CAPITAL LETTER HO + case 0x0541 => Array(0x0571) // ARMENIAN CAPITAL LETTER JA + case 0x0542 => Array(0x0572) // ARMENIAN CAPITAL LETTER GHAD + case 0x0543 => Array(0x0573) // ARMENIAN CAPITAL LETTER CHEH + case 0x0544 => Array(0x0574) // ARMENIAN CAPITAL LETTER MEN + case 0x0545 => Array(0x0575) // ARMENIAN CAPITAL LETTER YI + case 0x0546 => Array(0x0576) // ARMENIAN CAPITAL LETTER NOW + case 0x0547 => Array(0x0577) // ARMENIAN CAPITAL LETTER SHA + case 0x0548 => Array(0x0578) // ARMENIAN CAPITAL LETTER VO + case 0x0549 => Array(0x0579) // ARMENIAN CAPITAL LETTER CHA + case 0x054A => Array(0x057A) // ARMENIAN CAPITAL LETTER PEH + case 0x054B => Array(0x057B) // ARMENIAN CAPITAL LETTER JHEH + case 0x054C => Array(0x057C) // ARMENIAN CAPITAL LETTER RA + case 0x054D => Array(0x057D) // ARMENIAN CAPITAL LETTER SEH + case 0x054E => Array(0x057E) // ARMENIAN CAPITAL LETTER VEW + case 0x054F => Array(0x057F) // ARMENIAN CAPITAL LETTER TIWN + case 0x0550 => Array(0x0580) // ARMENIAN CAPITAL LETTER REH + case 0x0551 => Array(0x0581) // ARMENIAN CAPITAL LETTER CO + case 0x0552 => Array(0x0582) // ARMENIAN CAPITAL LETTER YIWN + case 0x0553 => Array(0x0583) // ARMENIAN CAPITAL LETTER PIWR + case 0x0554 => Array(0x0584) // ARMENIAN CAPITAL LETTER KEH + case 0x0555 => Array(0x0585) // ARMENIAN CAPITAL LETTER OH + case 0x0556 => Array(0x0586) // ARMENIAN CAPITAL LETTER FEH + case 0x10A0 => Array(0x2D00) // GEORGIAN CAPITAL LETTER AN + case 0x10A1 => Array(0x2D01) // GEORGIAN CAPITAL LETTER BAN + case 0x10A2 => Array(0x2D02) // GEORGIAN CAPITAL LETTER GAN + case 0x10A3 => Array(0x2D03) // GEORGIAN CAPITAL LETTER DON + case 0x10A4 => Array(0x2D04) // GEORGIAN CAPITAL LETTER EN + case 0x10A5 => Array(0x2D05) // GEORGIAN CAPITAL LETTER VIN + case 0x10A6 => Array(0x2D06) // GEORGIAN CAPITAL LETTER ZEN + case 0x10A7 => Array(0x2D07) // GEORGIAN CAPITAL LETTER TAN + case 0x10A8 => Array(0x2D08) // GEORGIAN CAPITAL LETTER IN + case 0x10A9 => Array(0x2D09) // GEORGIAN CAPITAL LETTER KAN + case 0x10AA => Array(0x2D0A) // GEORGIAN CAPITAL LETTER LAS + case 0x10AB => Array(0x2D0B) // GEORGIAN CAPITAL LETTER MAN + case 0x10AC => Array(0x2D0C) // GEORGIAN CAPITAL LETTER NAR + case 0x10AD => Array(0x2D0D) // GEORGIAN CAPITAL LETTER ON + case 0x10AE => Array(0x2D0E) // GEORGIAN CAPITAL LETTER PAR + case 0x10AF => Array(0x2D0F) // GEORGIAN CAPITAL LETTER ZHAR + case 0x10B0 => Array(0x2D10) // GEORGIAN CAPITAL LETTER RAE + case 0x10B1 => Array(0x2D11) // GEORGIAN CAPITAL LETTER SAN + case 0x10B2 => Array(0x2D12) // GEORGIAN CAPITAL LETTER TAR + case 0x10B3 => Array(0x2D13) // GEORGIAN CAPITAL LETTER UN + case 0x10B4 => Array(0x2D14) // GEORGIAN CAPITAL LETTER PHAR + case 0x10B5 => Array(0x2D15) // GEORGIAN CAPITAL LETTER KHAR + case 0x10B6 => Array(0x2D16) // GEORGIAN CAPITAL LETTER GHAN + case 0x10B7 => Array(0x2D17) // GEORGIAN CAPITAL LETTER QAR + case 0x10B8 => Array(0x2D18) // GEORGIAN CAPITAL LETTER SHIN + case 0x10B9 => Array(0x2D19) // GEORGIAN CAPITAL LETTER CHIN + case 0x10BA => Array(0x2D1A) // GEORGIAN CAPITAL LETTER CAN + case 0x10BB => Array(0x2D1B) // GEORGIAN CAPITAL LETTER JIL + case 0x10BC => Array(0x2D1C) // GEORGIAN CAPITAL LETTER CIL + case 0x10BD => Array(0x2D1D) // GEORGIAN CAPITAL LETTER CHAR + case 0x10BE => Array(0x2D1E) // GEORGIAN CAPITAL LETTER XAN + case 0x10BF => Array(0x2D1F) // GEORGIAN CAPITAL LETTER JHAN + case 0x10C0 => Array(0x2D20) // GEORGIAN CAPITAL LETTER HAE + case 0x10C1 => Array(0x2D21) // GEORGIAN CAPITAL LETTER HE + case 0x10C2 => Array(0x2D22) // GEORGIAN CAPITAL LETTER HIE + case 0x10C3 => Array(0x2D23) // GEORGIAN CAPITAL LETTER WE + case 0x10C4 => Array(0x2D24) // GEORGIAN CAPITAL LETTER HAR + case 0x10C5 => Array(0x2D25) // GEORGIAN CAPITAL LETTER HOE + case 0x10C7 => Array(0x2D27) // GEORGIAN CAPITAL LETTER YN + case 0x10CD => Array(0x2D2D) // GEORGIAN CAPITAL LETTER AEN + case 0x13F8 => Array(0x13F0) // CHEROKEE SMALL LETTER YE + case 0x13F9 => Array(0x13F1) // CHEROKEE SMALL LETTER YI + case 0x13FA => Array(0x13F2) // CHEROKEE SMALL LETTER YO + case 0x13FB => Array(0x13F3) // CHEROKEE SMALL LETTER YU + case 0x13FC => Array(0x13F4) // CHEROKEE SMALL LETTER YV + case 0x13FD => Array(0x13F5) // CHEROKEE SMALL LETTER MV + case 0x1C80 => Array(0x0432) // CYRILLIC SMALL LETTER ROUNDED VE + case 0x1C81 => Array(0x0434) // CYRILLIC SMALL LETTER LONG-LEGGED DE + case 0x1C82 => Array(0x043E) // CYRILLIC SMALL LETTER NARROW O + case 0x1C83 => Array(0x0441) // CYRILLIC SMALL LETTER WIDE ES + case 0x1C84 => Array(0x0442) // CYRILLIC SMALL LETTER TALL TE + case 0x1C85 => Array(0x0442) // CYRILLIC SMALL LETTER THREE-LEGGED TE + case 0x1C86 => Array(0x044A) // CYRILLIC SMALL LETTER TALL HARD SIGN + case 0x1C87 => Array(0x0463) // CYRILLIC SMALL LETTER TALL YAT + case 0x1C88 => Array(0xA64B) // CYRILLIC SMALL LETTER UNBLENDED UK + case 0x1C90 => Array(0x10D0) // GEORGIAN MTAVRULI CAPITAL LETTER AN + case 0x1C91 => Array(0x10D1) // GEORGIAN MTAVRULI CAPITAL LETTER BAN + case 0x1C92 => Array(0x10D2) // GEORGIAN MTAVRULI CAPITAL LETTER GAN + case 0x1C93 => Array(0x10D3) // GEORGIAN MTAVRULI CAPITAL LETTER DON + case 0x1C94 => Array(0x10D4) // GEORGIAN MTAVRULI CAPITAL LETTER EN + case 0x1C95 => Array(0x10D5) // GEORGIAN MTAVRULI CAPITAL LETTER VIN + case 0x1C96 => Array(0x10D6) // GEORGIAN MTAVRULI CAPITAL LETTER ZEN + case 0x1C97 => Array(0x10D7) // GEORGIAN MTAVRULI CAPITAL LETTER TAN + case 0x1C98 => Array(0x10D8) // GEORGIAN MTAVRULI CAPITAL LETTER IN + case 0x1C99 => Array(0x10D9) // GEORGIAN MTAVRULI CAPITAL LETTER KAN + case 0x1C9A => Array(0x10DA) // GEORGIAN MTAVRULI CAPITAL LETTER LAS + case 0x1C9B => Array(0x10DB) // GEORGIAN MTAVRULI CAPITAL LETTER MAN + case 0x1C9C => Array(0x10DC) // GEORGIAN MTAVRULI CAPITAL LETTER NAR + case 0x1C9D => Array(0x10DD) // GEORGIAN MTAVRULI CAPITAL LETTER ON + case 0x1C9E => Array(0x10DE) // GEORGIAN MTAVRULI CAPITAL LETTER PAR + case 0x1C9F => Array(0x10DF) // GEORGIAN MTAVRULI CAPITAL LETTER ZHAR + case 0x1CA0 => Array(0x10E0) // GEORGIAN MTAVRULI CAPITAL LETTER RAE + case 0x1CA1 => Array(0x10E1) // GEORGIAN MTAVRULI CAPITAL LETTER SAN + case 0x1CA2 => Array(0x10E2) // GEORGIAN MTAVRULI CAPITAL LETTER TAR + case 0x1CA3 => Array(0x10E3) // GEORGIAN MTAVRULI CAPITAL LETTER UN + case 0x1CA4 => Array(0x10E4) // GEORGIAN MTAVRULI CAPITAL LETTER PHAR + case 0x1CA5 => Array(0x10E5) // GEORGIAN MTAVRULI CAPITAL LETTER KHAR + case 0x1CA6 => Array(0x10E6) // GEORGIAN MTAVRULI CAPITAL LETTER GHAN + case 0x1CA7 => Array(0x10E7) // GEORGIAN MTAVRULI CAPITAL LETTER QAR + case 0x1CA8 => Array(0x10E8) // GEORGIAN MTAVRULI CAPITAL LETTER SHIN + case 0x1CA9 => Array(0x10E9) // GEORGIAN MTAVRULI CAPITAL LETTER CHIN + case 0x1CAA => Array(0x10EA) // GEORGIAN MTAVRULI CAPITAL LETTER CAN + case 0x1CAB => Array(0x10EB) // GEORGIAN MTAVRULI CAPITAL LETTER JIL + case 0x1CAC => Array(0x10EC) // GEORGIAN MTAVRULI CAPITAL LETTER CIL + case 0x1CAD => Array(0x10ED) // GEORGIAN MTAVRULI CAPITAL LETTER CHAR + case 0x1CAE => Array(0x10EE) // GEORGIAN MTAVRULI CAPITAL LETTER XAN + case 0x1CAF => Array(0x10EF) // GEORGIAN MTAVRULI CAPITAL LETTER JHAN + case 0x1CB0 => Array(0x10F0) // GEORGIAN MTAVRULI CAPITAL LETTER HAE + case 0x1CB1 => Array(0x10F1) // GEORGIAN MTAVRULI CAPITAL LETTER HE + case 0x1CB2 => Array(0x10F2) // GEORGIAN MTAVRULI CAPITAL LETTER HIE + case 0x1CB3 => Array(0x10F3) // GEORGIAN MTAVRULI CAPITAL LETTER WE + case 0x1CB4 => Array(0x10F4) // GEORGIAN MTAVRULI CAPITAL LETTER HAR + case 0x1CB5 => Array(0x10F5) // GEORGIAN MTAVRULI CAPITAL LETTER HOE + case 0x1CB6 => Array(0x10F6) // GEORGIAN MTAVRULI CAPITAL LETTER FI + case 0x1CB7 => Array(0x10F7) // GEORGIAN MTAVRULI CAPITAL LETTER YN + case 0x1CB8 => Array(0x10F8) // GEORGIAN MTAVRULI CAPITAL LETTER ELIFI + case 0x1CB9 => Array(0x10F9) // GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN + case 0x1CBA => Array(0x10FA) // GEORGIAN MTAVRULI CAPITAL LETTER AIN + case 0x1CBD => Array(0x10FD) // GEORGIAN MTAVRULI CAPITAL LETTER AEN + case 0x1CBE => Array(0x10FE) // GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN + case 0x1CBF => Array(0x10FF) // GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN + case 0x1E00 => Array(0x1E01) // LATIN CAPITAL LETTER A WITH RING BELOW + case 0x1E02 => Array(0x1E03) // LATIN CAPITAL LETTER B WITH DOT ABOVE + case 0x1E04 => Array(0x1E05) // LATIN CAPITAL LETTER B WITH DOT BELOW + case 0x1E06 => Array(0x1E07) // LATIN CAPITAL LETTER B WITH LINE BELOW + case 0x1E08 => Array(0x1E09) // LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE + case 0x1E0A => Array(0x1E0B) // LATIN CAPITAL LETTER D WITH DOT ABOVE + case 0x1E0C => Array(0x1E0D) // LATIN CAPITAL LETTER D WITH DOT BELOW + case 0x1E0E => Array(0x1E0F) // LATIN CAPITAL LETTER D WITH LINE BELOW + case 0x1E10 => Array(0x1E11) // LATIN CAPITAL LETTER D WITH CEDILLA + case 0x1E12 => Array(0x1E13) // LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW + case 0x1E14 => Array(0x1E15) // LATIN CAPITAL LETTER E WITH MACRON AND GRAVE + case 0x1E16 => Array(0x1E17) // LATIN CAPITAL LETTER E WITH MACRON AND ACUTE + case 0x1E18 => Array(0x1E19) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW + case 0x1E1A => Array(0x1E1B) // LATIN CAPITAL LETTER E WITH TILDE BELOW + case 0x1E1C => Array(0x1E1D) // LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE + case 0x1E1E => Array(0x1E1F) // LATIN CAPITAL LETTER F WITH DOT ABOVE + case 0x1E20 => Array(0x1E21) // LATIN CAPITAL LETTER G WITH MACRON + case 0x1E22 => Array(0x1E23) // LATIN CAPITAL LETTER H WITH DOT ABOVE + case 0x1E24 => Array(0x1E25) // LATIN CAPITAL LETTER H WITH DOT BELOW + case 0x1E26 => Array(0x1E27) // LATIN CAPITAL LETTER H WITH DIAERESIS + case 0x1E28 => Array(0x1E29) // LATIN CAPITAL LETTER H WITH CEDILLA + case 0x1E2A => Array(0x1E2B) // LATIN CAPITAL LETTER H WITH BREVE BELOW + case 0x1E2C => Array(0x1E2D) // LATIN CAPITAL LETTER I WITH TILDE BELOW + case 0x1E2E => Array(0x1E2F) // LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE + case 0x1E30 => Array(0x1E31) // LATIN CAPITAL LETTER K WITH ACUTE + case 0x1E32 => Array(0x1E33) // LATIN CAPITAL LETTER K WITH DOT BELOW + case 0x1E34 => Array(0x1E35) // LATIN CAPITAL LETTER K WITH LINE BELOW + case 0x1E36 => Array(0x1E37) // LATIN CAPITAL LETTER L WITH DOT BELOW + case 0x1E38 => Array(0x1E39) // LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON + case 0x1E3A => Array(0x1E3B) // LATIN CAPITAL LETTER L WITH LINE BELOW + case 0x1E3C => Array(0x1E3D) // LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW + case 0x1E3E => Array(0x1E3F) // LATIN CAPITAL LETTER M WITH ACUTE + case 0x1E40 => Array(0x1E41) // LATIN CAPITAL LETTER M WITH DOT ABOVE + case 0x1E42 => Array(0x1E43) // LATIN CAPITAL LETTER M WITH DOT BELOW + case 0x1E44 => Array(0x1E45) // LATIN CAPITAL LETTER N WITH DOT ABOVE + case 0x1E46 => Array(0x1E47) // LATIN CAPITAL LETTER N WITH DOT BELOW + case 0x1E48 => Array(0x1E49) // LATIN CAPITAL LETTER N WITH LINE BELOW + case 0x1E4A => Array(0x1E4B) // LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW + case 0x1E4C => Array(0x1E4D) // LATIN CAPITAL LETTER O WITH TILDE AND ACUTE + case 0x1E4E => Array(0x1E4F) // LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS + case 0x1E50 => Array(0x1E51) // LATIN CAPITAL LETTER O WITH MACRON AND GRAVE + case 0x1E52 => Array(0x1E53) // LATIN CAPITAL LETTER O WITH MACRON AND ACUTE + case 0x1E54 => Array(0x1E55) // LATIN CAPITAL LETTER P WITH ACUTE + case 0x1E56 => Array(0x1E57) // LATIN CAPITAL LETTER P WITH DOT ABOVE + case 0x1E58 => Array(0x1E59) // LATIN CAPITAL LETTER R WITH DOT ABOVE + case 0x1E5A => Array(0x1E5B) // LATIN CAPITAL LETTER R WITH DOT BELOW + case 0x1E5C => Array(0x1E5D) // LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON + case 0x1E5E => Array(0x1E5F) // LATIN CAPITAL LETTER R WITH LINE BELOW + case 0x1E60 => Array(0x1E61) // LATIN CAPITAL LETTER S WITH DOT ABOVE + case 0x1E62 => Array(0x1E63) // LATIN CAPITAL LETTER S WITH DOT BELOW + case 0x1E64 => Array(0x1E65) // LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE + case 0x1E66 => Array(0x1E67) // LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE + case 0x1E68 => Array(0x1E69) // LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE + case 0x1E6A => Array(0x1E6B) // LATIN CAPITAL LETTER T WITH DOT ABOVE + case 0x1E6C => Array(0x1E6D) // LATIN CAPITAL LETTER T WITH DOT BELOW + case 0x1E6E => Array(0x1E6F) // LATIN CAPITAL LETTER T WITH LINE BELOW + case 0x1E70 => Array(0x1E71) // LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW + case 0x1E72 => Array(0x1E73) // LATIN CAPITAL LETTER U WITH DIAERESIS BELOW + case 0x1E74 => Array(0x1E75) // LATIN CAPITAL LETTER U WITH TILDE BELOW + case 0x1E76 => Array(0x1E77) // LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW + case 0x1E78 => Array(0x1E79) // LATIN CAPITAL LETTER U WITH TILDE AND ACUTE + case 0x1E7A => Array(0x1E7B) // LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS + case 0x1E7C => Array(0x1E7D) // LATIN CAPITAL LETTER V WITH TILDE + case 0x1E7E => Array(0x1E7F) // LATIN CAPITAL LETTER V WITH DOT BELOW + case 0x1E80 => Array(0x1E81) // LATIN CAPITAL LETTER W WITH GRAVE + case 0x1E82 => Array(0x1E83) // LATIN CAPITAL LETTER W WITH ACUTE + case 0x1E84 => Array(0x1E85) // LATIN CAPITAL LETTER W WITH DIAERESIS + case 0x1E86 => Array(0x1E87) // LATIN CAPITAL LETTER W WITH DOT ABOVE + case 0x1E88 => Array(0x1E89) // LATIN CAPITAL LETTER W WITH DOT BELOW + case 0x1E8A => Array(0x1E8B) // LATIN CAPITAL LETTER X WITH DOT ABOVE + case 0x1E8C => Array(0x1E8D) // LATIN CAPITAL LETTER X WITH DIAERESIS + case 0x1E8E => Array(0x1E8F) // LATIN CAPITAL LETTER Y WITH DOT ABOVE + case 0x1E90 => Array(0x1E91) // LATIN CAPITAL LETTER Z WITH CIRCUMFLEX + case 0x1E92 => Array(0x1E93) // LATIN CAPITAL LETTER Z WITH DOT BELOW + case 0x1E94 => Array(0x1E95) // LATIN CAPITAL LETTER Z WITH LINE BELOW + case 0x1E9B => Array(0x1E61) // LATIN SMALL LETTER LONG S WITH DOT ABOVE + case 0x1EA0 => Array(0x1EA1) // LATIN CAPITAL LETTER A WITH DOT BELOW + case 0x1EA2 => Array(0x1EA3) // LATIN CAPITAL LETTER A WITH HOOK ABOVE + case 0x1EA4 => Array(0x1EA5) // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE + case 0x1EA6 => Array(0x1EA7) // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE + case 0x1EA8 => Array(0x1EA9) // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1EAA => Array(0x1EAB) // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE + case 0x1EAC => Array(0x1EAD) // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW + case 0x1EAE => Array(0x1EAF) // LATIN CAPITAL LETTER A WITH BREVE AND ACUTE + case 0x1EB0 => Array(0x1EB1) // LATIN CAPITAL LETTER A WITH BREVE AND GRAVE + case 0x1EB2 => Array(0x1EB3) // LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE + case 0x1EB4 => Array(0x1EB5) // LATIN CAPITAL LETTER A WITH BREVE AND TILDE + case 0x1EB6 => Array(0x1EB7) // LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW + case 0x1EB8 => Array(0x1EB9) // LATIN CAPITAL LETTER E WITH DOT BELOW + case 0x1EBA => Array(0x1EBB) // LATIN CAPITAL LETTER E WITH HOOK ABOVE + case 0x1EBC => Array(0x1EBD) // LATIN CAPITAL LETTER E WITH TILDE + case 0x1EBE => Array(0x1EBF) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE + case 0x1EC0 => Array(0x1EC1) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE + case 0x1EC2 => Array(0x1EC3) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1EC4 => Array(0x1EC5) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE + case 0x1EC6 => Array(0x1EC7) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW + case 0x1EC8 => Array(0x1EC9) // LATIN CAPITAL LETTER I WITH HOOK ABOVE + case 0x1ECA => Array(0x1ECB) // LATIN CAPITAL LETTER I WITH DOT BELOW + case 0x1ECC => Array(0x1ECD) // LATIN CAPITAL LETTER O WITH DOT BELOW + case 0x1ECE => Array(0x1ECF) // LATIN CAPITAL LETTER O WITH HOOK ABOVE + case 0x1ED0 => Array(0x1ED1) // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE + case 0x1ED2 => Array(0x1ED3) // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE + case 0x1ED4 => Array(0x1ED5) // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1ED6 => Array(0x1ED7) // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE + case 0x1ED8 => Array(0x1ED9) // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW + case 0x1EDA => Array(0x1EDB) // LATIN CAPITAL LETTER O WITH HORN AND ACUTE + case 0x1EDC => Array(0x1EDD) // LATIN CAPITAL LETTER O WITH HORN AND GRAVE + case 0x1EDE => Array(0x1EDF) // LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE + case 0x1EE0 => Array(0x1EE1) // LATIN CAPITAL LETTER O WITH HORN AND TILDE + case 0x1EE2 => Array(0x1EE3) // LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW + case 0x1EE4 => Array(0x1EE5) // LATIN CAPITAL LETTER U WITH DOT BELOW + case 0x1EE6 => Array(0x1EE7) // LATIN CAPITAL LETTER U WITH HOOK ABOVE + case 0x1EE8 => Array(0x1EE9) // LATIN CAPITAL LETTER U WITH HORN AND ACUTE + case 0x1EEA => Array(0x1EEB) // LATIN CAPITAL LETTER U WITH HORN AND GRAVE + case 0x1EEC => Array(0x1EED) // LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE + case 0x1EEE => Array(0x1EEF) // LATIN CAPITAL LETTER U WITH HORN AND TILDE + case 0x1EF0 => Array(0x1EF1) // LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW + case 0x1EF2 => Array(0x1EF3) // LATIN CAPITAL LETTER Y WITH GRAVE + case 0x1EF4 => Array(0x1EF5) // LATIN CAPITAL LETTER Y WITH DOT BELOW + case 0x1EF6 => Array(0x1EF7) // LATIN CAPITAL LETTER Y WITH HOOK ABOVE + case 0x1EF8 => Array(0x1EF9) // LATIN CAPITAL LETTER Y WITH TILDE + case 0x1EFA => Array(0x1EFB) // LATIN CAPITAL LETTER MIDDLE-WELSH LL + case 0x1EFC => Array(0x1EFD) // LATIN CAPITAL LETTER MIDDLE-WELSH V + case 0x1EFE => Array(0x1EFF) // LATIN CAPITAL LETTER Y WITH LOOP + case 0x1F08 => Array(0x1F00) // GREEK CAPITAL LETTER ALPHA WITH PSILI + case 0x1F09 => Array(0x1F01) // GREEK CAPITAL LETTER ALPHA WITH DASIA + case 0x1F0A => Array(0x1F02) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA + case 0x1F0B => Array(0x1F03) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA + case 0x1F0C => Array(0x1F04) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA + case 0x1F0D => Array(0x1F05) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA + case 0x1F0E => Array(0x1F06) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI + case 0x1F0F => Array(0x1F07) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI + case 0x1F18 => Array(0x1F10) // GREEK CAPITAL LETTER EPSILON WITH PSILI + case 0x1F19 => Array(0x1F11) // GREEK CAPITAL LETTER EPSILON WITH DASIA + case 0x1F1A => Array(0x1F12) // GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA + case 0x1F1B => Array(0x1F13) // GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA + case 0x1F1C => Array(0x1F14) // GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA + case 0x1F1D => Array(0x1F15) // GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA + case 0x1F28 => Array(0x1F20) // GREEK CAPITAL LETTER ETA WITH PSILI + case 0x1F29 => Array(0x1F21) // GREEK CAPITAL LETTER ETA WITH DASIA + case 0x1F2A => Array(0x1F22) // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA + case 0x1F2B => Array(0x1F23) // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA + case 0x1F2C => Array(0x1F24) // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA + case 0x1F2D => Array(0x1F25) // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA + case 0x1F2E => Array(0x1F26) // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI + case 0x1F2F => Array(0x1F27) // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI + case 0x1F38 => Array(0x1F30) // GREEK CAPITAL LETTER IOTA WITH PSILI + case 0x1F39 => Array(0x1F31) // GREEK CAPITAL LETTER IOTA WITH DASIA + case 0x1F3A => Array(0x1F32) // GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA + case 0x1F3B => Array(0x1F33) // GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA + case 0x1F3C => Array(0x1F34) // GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA + case 0x1F3D => Array(0x1F35) // GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA + case 0x1F3E => Array(0x1F36) // GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI + case 0x1F3F => Array(0x1F37) // GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI + case 0x1F48 => Array(0x1F40) // GREEK CAPITAL LETTER OMICRON WITH PSILI + case 0x1F49 => Array(0x1F41) // GREEK CAPITAL LETTER OMICRON WITH DASIA + case 0x1F4A => Array(0x1F42) // GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA + case 0x1F4B => Array(0x1F43) // GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA + case 0x1F4C => Array(0x1F44) // GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA + case 0x1F4D => Array(0x1F45) // GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA + case 0x1F59 => Array(0x1F51) // GREEK CAPITAL LETTER UPSILON WITH DASIA + case 0x1F5B => Array(0x1F53) // GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA + case 0x1F5D => Array(0x1F55) // GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA + case 0x1F5F => Array(0x1F57) // GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI + case 0x1F68 => Array(0x1F60) // GREEK CAPITAL LETTER OMEGA WITH PSILI + case 0x1F69 => Array(0x1F61) // GREEK CAPITAL LETTER OMEGA WITH DASIA + case 0x1F6A => Array(0x1F62) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA + case 0x1F6B => Array(0x1F63) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA + case 0x1F6C => Array(0x1F64) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA + case 0x1F6D => Array(0x1F65) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA + case 0x1F6E => Array(0x1F66) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI + case 0x1F6F => Array(0x1F67) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI + case 0x1FB8 => Array(0x1FB0) // GREEK CAPITAL LETTER ALPHA WITH VRACHY + case 0x1FB9 => Array(0x1FB1) // GREEK CAPITAL LETTER ALPHA WITH MACRON + case 0x1FBA => Array(0x1F70) // GREEK CAPITAL LETTER ALPHA WITH VARIA + case 0x1FBB => Array(0x1F71) // GREEK CAPITAL LETTER ALPHA WITH OXIA + case 0x1FBE => Array(0x03B9) // GREEK PROSGEGRAMMENI + case 0x1FC8 => Array(0x1F72) // GREEK CAPITAL LETTER EPSILON WITH VARIA + case 0x1FC9 => Array(0x1F73) // GREEK CAPITAL LETTER EPSILON WITH OXIA + case 0x1FCA => Array(0x1F74) // GREEK CAPITAL LETTER ETA WITH VARIA + case 0x1FCB => Array(0x1F75) // GREEK CAPITAL LETTER ETA WITH OXIA + case 0x1FD8 => Array(0x1FD0) // GREEK CAPITAL LETTER IOTA WITH VRACHY + case 0x1FD9 => Array(0x1FD1) // GREEK CAPITAL LETTER IOTA WITH MACRON + case 0x1FDA => Array(0x1F76) // GREEK CAPITAL LETTER IOTA WITH VARIA + case 0x1FDB => Array(0x1F77) // GREEK CAPITAL LETTER IOTA WITH OXIA + case 0x1FE8 => Array(0x1FE0) // GREEK CAPITAL LETTER UPSILON WITH VRACHY + case 0x1FE9 => Array(0x1FE1) // GREEK CAPITAL LETTER UPSILON WITH MACRON + case 0x1FEA => Array(0x1F7A) // GREEK CAPITAL LETTER UPSILON WITH VARIA + case 0x1FEB => Array(0x1F7B) // GREEK CAPITAL LETTER UPSILON WITH OXIA + case 0x1FEC => Array(0x1FE5) // GREEK CAPITAL LETTER RHO WITH DASIA + case 0x1FF8 => Array(0x1F78) // GREEK CAPITAL LETTER OMICRON WITH VARIA + case 0x1FF9 => Array(0x1F79) // GREEK CAPITAL LETTER OMICRON WITH OXIA + case 0x1FFA => Array(0x1F7C) // GREEK CAPITAL LETTER OMEGA WITH VARIA + case 0x1FFB => Array(0x1F7D) // GREEK CAPITAL LETTER OMEGA WITH OXIA + case 0x2126 => Array(0x03C9) // OHM SIGN + case 0x212A => Array(0x006B) // KELVIN SIGN + case 0x212B => Array(0x00E5) // ANGSTROM SIGN + case 0x2132 => Array(0x214E) // TURNED CAPITAL F + case 0x2160 => Array(0x2170) // ROMAN NUMERAL ONE + case 0x2161 => Array(0x2171) // ROMAN NUMERAL TWO + case 0x2162 => Array(0x2172) // ROMAN NUMERAL THREE + case 0x2163 => Array(0x2173) // ROMAN NUMERAL FOUR + case 0x2164 => Array(0x2174) // ROMAN NUMERAL FIVE + case 0x2165 => Array(0x2175) // ROMAN NUMERAL SIX + case 0x2166 => Array(0x2176) // ROMAN NUMERAL SEVEN + case 0x2167 => Array(0x2177) // ROMAN NUMERAL EIGHT + case 0x2168 => Array(0x2178) // ROMAN NUMERAL NINE + case 0x2169 => Array(0x2179) // ROMAN NUMERAL TEN + case 0x216A => Array(0x217A) // ROMAN NUMERAL ELEVEN + case 0x216B => Array(0x217B) // ROMAN NUMERAL TWELVE + case 0x216C => Array(0x217C) // ROMAN NUMERAL FIFTY + case 0x216D => Array(0x217D) // ROMAN NUMERAL ONE HUNDRED + case 0x216E => Array(0x217E) // ROMAN NUMERAL FIVE HUNDRED + case 0x216F => Array(0x217F) // ROMAN NUMERAL ONE THOUSAND + case 0x2183 => Array(0x2184) // ROMAN NUMERAL REVERSED ONE HUNDRED + case 0x24B6 => Array(0x24D0) // CIRCLED LATIN CAPITAL LETTER A + case 0x24B7 => Array(0x24D1) // CIRCLED LATIN CAPITAL LETTER B + case 0x24B8 => Array(0x24D2) // CIRCLED LATIN CAPITAL LETTER C + case 0x24B9 => Array(0x24D3) // CIRCLED LATIN CAPITAL LETTER D + case 0x24BA => Array(0x24D4) // CIRCLED LATIN CAPITAL LETTER E + case 0x24BB => Array(0x24D5) // CIRCLED LATIN CAPITAL LETTER F + case 0x24BC => Array(0x24D6) // CIRCLED LATIN CAPITAL LETTER G + case 0x24BD => Array(0x24D7) // CIRCLED LATIN CAPITAL LETTER H + case 0x24BE => Array(0x24D8) // CIRCLED LATIN CAPITAL LETTER I + case 0x24BF => Array(0x24D9) // CIRCLED LATIN CAPITAL LETTER J + case 0x24C0 => Array(0x24DA) // CIRCLED LATIN CAPITAL LETTER K + case 0x24C1 => Array(0x24DB) // CIRCLED LATIN CAPITAL LETTER L + case 0x24C2 => Array(0x24DC) // CIRCLED LATIN CAPITAL LETTER M + case 0x24C3 => Array(0x24DD) // CIRCLED LATIN CAPITAL LETTER N + case 0x24C4 => Array(0x24DE) // CIRCLED LATIN CAPITAL LETTER O + case 0x24C5 => Array(0x24DF) // CIRCLED LATIN CAPITAL LETTER P + case 0x24C6 => Array(0x24E0) // CIRCLED LATIN CAPITAL LETTER Q + case 0x24C7 => Array(0x24E1) // CIRCLED LATIN CAPITAL LETTER R + case 0x24C8 => Array(0x24E2) // CIRCLED LATIN CAPITAL LETTER S + case 0x24C9 => Array(0x24E3) // CIRCLED LATIN CAPITAL LETTER T + case 0x24CA => Array(0x24E4) // CIRCLED LATIN CAPITAL LETTER U + case 0x24CB => Array(0x24E5) // CIRCLED LATIN CAPITAL LETTER V + case 0x24CC => Array(0x24E6) // CIRCLED LATIN CAPITAL LETTER W + case 0x24CD => Array(0x24E7) // CIRCLED LATIN CAPITAL LETTER X + case 0x24CE => Array(0x24E8) // CIRCLED LATIN CAPITAL LETTER Y + case 0x24CF => Array(0x24E9) // CIRCLED LATIN CAPITAL LETTER Z + case 0x2C00 => Array(0x2C30) // GLAGOLITIC CAPITAL LETTER AZU + case 0x2C01 => Array(0x2C31) // GLAGOLITIC CAPITAL LETTER BUKY + case 0x2C02 => Array(0x2C32) // GLAGOLITIC CAPITAL LETTER VEDE + case 0x2C03 => Array(0x2C33) // GLAGOLITIC CAPITAL LETTER GLAGOLI + case 0x2C04 => Array(0x2C34) // GLAGOLITIC CAPITAL LETTER DOBRO + case 0x2C05 => Array(0x2C35) // GLAGOLITIC CAPITAL LETTER YESTU + case 0x2C06 => Array(0x2C36) // GLAGOLITIC CAPITAL LETTER ZHIVETE + case 0x2C07 => Array(0x2C37) // GLAGOLITIC CAPITAL LETTER DZELO + case 0x2C08 => Array(0x2C38) // GLAGOLITIC CAPITAL LETTER ZEMLJA + case 0x2C09 => Array(0x2C39) // GLAGOLITIC CAPITAL LETTER IZHE + case 0x2C0A => Array(0x2C3A) // GLAGOLITIC CAPITAL LETTER INITIAL IZHE + case 0x2C0B => Array(0x2C3B) // GLAGOLITIC CAPITAL LETTER I + case 0x2C0C => Array(0x2C3C) // GLAGOLITIC CAPITAL LETTER DJERVI + case 0x2C0D => Array(0x2C3D) // GLAGOLITIC CAPITAL LETTER KAKO + case 0x2C0E => Array(0x2C3E) // GLAGOLITIC CAPITAL LETTER LJUDIJE + case 0x2C0F => Array(0x2C3F) // GLAGOLITIC CAPITAL LETTER MYSLITE + case 0x2C10 => Array(0x2C40) // GLAGOLITIC CAPITAL LETTER NASHI + case 0x2C11 => Array(0x2C41) // GLAGOLITIC CAPITAL LETTER ONU + case 0x2C12 => Array(0x2C42) // GLAGOLITIC CAPITAL LETTER POKOJI + case 0x2C13 => Array(0x2C43) // GLAGOLITIC CAPITAL LETTER RITSI + case 0x2C14 => Array(0x2C44) // GLAGOLITIC CAPITAL LETTER SLOVO + case 0x2C15 => Array(0x2C45) // GLAGOLITIC CAPITAL LETTER TVRIDO + case 0x2C16 => Array(0x2C46) // GLAGOLITIC CAPITAL LETTER UKU + case 0x2C17 => Array(0x2C47) // GLAGOLITIC CAPITAL LETTER FRITU + case 0x2C18 => Array(0x2C48) // GLAGOLITIC CAPITAL LETTER HERU + case 0x2C19 => Array(0x2C49) // GLAGOLITIC CAPITAL LETTER OTU + case 0x2C1A => Array(0x2C4A) // GLAGOLITIC CAPITAL LETTER PE + case 0x2C1B => Array(0x2C4B) // GLAGOLITIC CAPITAL LETTER SHTA + case 0x2C1C => Array(0x2C4C) // GLAGOLITIC CAPITAL LETTER TSI + case 0x2C1D => Array(0x2C4D) // GLAGOLITIC CAPITAL LETTER CHRIVI + case 0x2C1E => Array(0x2C4E) // GLAGOLITIC CAPITAL LETTER SHA + case 0x2C1F => Array(0x2C4F) // GLAGOLITIC CAPITAL LETTER YERU + case 0x2C20 => Array(0x2C50) // GLAGOLITIC CAPITAL LETTER YERI + case 0x2C21 => Array(0x2C51) // GLAGOLITIC CAPITAL LETTER YATI + case 0x2C22 => Array(0x2C52) // GLAGOLITIC CAPITAL LETTER SPIDERY HA + case 0x2C23 => Array(0x2C53) // GLAGOLITIC CAPITAL LETTER YU + case 0x2C24 => Array(0x2C54) // GLAGOLITIC CAPITAL LETTER SMALL YUS + case 0x2C25 => Array(0x2C55) // GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL + case 0x2C26 => Array(0x2C56) // GLAGOLITIC CAPITAL LETTER YO + case 0x2C27 => Array(0x2C57) // GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS + case 0x2C28 => Array(0x2C58) // GLAGOLITIC CAPITAL LETTER BIG YUS + case 0x2C29 => Array(0x2C59) // GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS + case 0x2C2A => Array(0x2C5A) // GLAGOLITIC CAPITAL LETTER FITA + case 0x2C2B => Array(0x2C5B) // GLAGOLITIC CAPITAL LETTER IZHITSA + case 0x2C2C => Array(0x2C5C) // GLAGOLITIC CAPITAL LETTER SHTAPIC + case 0x2C2D => Array(0x2C5D) // GLAGOLITIC CAPITAL LETTER TROKUTASTI A + case 0x2C2E => Array(0x2C5E) // GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE + case 0x2C2F => Array(0x2C5F) // GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI + case 0x2C60 => Array(0x2C61) // LATIN CAPITAL LETTER L WITH DOUBLE BAR + case 0x2C62 => Array(0x026B) // LATIN CAPITAL LETTER L WITH MIDDLE TILDE + case 0x2C63 => Array(0x1D7D) // LATIN CAPITAL LETTER P WITH STROKE + case 0x2C64 => Array(0x027D) // LATIN CAPITAL LETTER R WITH TAIL + case 0x2C67 => Array(0x2C68) // LATIN CAPITAL LETTER H WITH DESCENDER + case 0x2C69 => Array(0x2C6A) // LATIN CAPITAL LETTER K WITH DESCENDER + case 0x2C6B => Array(0x2C6C) // LATIN CAPITAL LETTER Z WITH DESCENDER + case 0x2C6D => Array(0x0251) // LATIN CAPITAL LETTER ALPHA + case 0x2C6E => Array(0x0271) // LATIN CAPITAL LETTER M WITH HOOK + case 0x2C6F => Array(0x0250) // LATIN CAPITAL LETTER TURNED A + case 0x2C70 => Array(0x0252) // LATIN CAPITAL LETTER TURNED ALPHA + case 0x2C72 => Array(0x2C73) // LATIN CAPITAL LETTER W WITH HOOK + case 0x2C75 => Array(0x2C76) // LATIN CAPITAL LETTER HALF H + case 0x2C7E => Array(0x023F) // LATIN CAPITAL LETTER S WITH SWASH TAIL + case 0x2C7F => Array(0x0240) // LATIN CAPITAL LETTER Z WITH SWASH TAIL + case 0x2C80 => Array(0x2C81) // COPTIC CAPITAL LETTER ALFA + case 0x2C82 => Array(0x2C83) // COPTIC CAPITAL LETTER VIDA + case 0x2C84 => Array(0x2C85) // COPTIC CAPITAL LETTER GAMMA + case 0x2C86 => Array(0x2C87) // COPTIC CAPITAL LETTER DALDA + case 0x2C88 => Array(0x2C89) // COPTIC CAPITAL LETTER EIE + case 0x2C8A => Array(0x2C8B) // COPTIC CAPITAL LETTER SOU + case 0x2C8C => Array(0x2C8D) // COPTIC CAPITAL LETTER ZATA + case 0x2C8E => Array(0x2C8F) // COPTIC CAPITAL LETTER HATE + case 0x2C90 => Array(0x2C91) // COPTIC CAPITAL LETTER THETHE + case 0x2C92 => Array(0x2C93) // COPTIC CAPITAL LETTER IAUDA + case 0x2C94 => Array(0x2C95) // COPTIC CAPITAL LETTER KAPA + case 0x2C96 => Array(0x2C97) // COPTIC CAPITAL LETTER LAULA + case 0x2C98 => Array(0x2C99) // COPTIC CAPITAL LETTER MI + case 0x2C9A => Array(0x2C9B) // COPTIC CAPITAL LETTER NI + case 0x2C9C => Array(0x2C9D) // COPTIC CAPITAL LETTER KSI + case 0x2C9E => Array(0x2C9F) // COPTIC CAPITAL LETTER O + case 0x2CA0 => Array(0x2CA1) // COPTIC CAPITAL LETTER PI + case 0x2CA2 => Array(0x2CA3) // COPTIC CAPITAL LETTER RO + case 0x2CA4 => Array(0x2CA5) // COPTIC CAPITAL LETTER SIMA + case 0x2CA6 => Array(0x2CA7) // COPTIC CAPITAL LETTER TAU + case 0x2CA8 => Array(0x2CA9) // COPTIC CAPITAL LETTER UA + case 0x2CAA => Array(0x2CAB) // COPTIC CAPITAL LETTER FI + case 0x2CAC => Array(0x2CAD) // COPTIC CAPITAL LETTER KHI + case 0x2CAE => Array(0x2CAF) // COPTIC CAPITAL LETTER PSI + case 0x2CB0 => Array(0x2CB1) // COPTIC CAPITAL LETTER OOU + case 0x2CB2 => Array(0x2CB3) // COPTIC CAPITAL LETTER DIALECT-P ALEF + case 0x2CB4 => Array(0x2CB5) // COPTIC CAPITAL LETTER OLD COPTIC AIN + case 0x2CB6 => Array(0x2CB7) // COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE + case 0x2CB8 => Array(0x2CB9) // COPTIC CAPITAL LETTER DIALECT-P KAPA + case 0x2CBA => Array(0x2CBB) // COPTIC CAPITAL LETTER DIALECT-P NI + case 0x2CBC => Array(0x2CBD) // COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI + case 0x2CBE => Array(0x2CBF) // COPTIC CAPITAL LETTER OLD COPTIC OOU + case 0x2CC0 => Array(0x2CC1) // COPTIC CAPITAL LETTER SAMPI + case 0x2CC2 => Array(0x2CC3) // COPTIC CAPITAL LETTER CROSSED SHEI + case 0x2CC4 => Array(0x2CC5) // COPTIC CAPITAL LETTER OLD COPTIC SHEI + case 0x2CC6 => Array(0x2CC7) // COPTIC CAPITAL LETTER OLD COPTIC ESH + case 0x2CC8 => Array(0x2CC9) // COPTIC CAPITAL LETTER AKHMIMIC KHEI + case 0x2CCA => Array(0x2CCB) // COPTIC CAPITAL LETTER DIALECT-P HORI + case 0x2CCC => Array(0x2CCD) // COPTIC CAPITAL LETTER OLD COPTIC HORI + case 0x2CCE => Array(0x2CCF) // COPTIC CAPITAL LETTER OLD COPTIC HA + case 0x2CD0 => Array(0x2CD1) // COPTIC CAPITAL LETTER L-SHAPED HA + case 0x2CD2 => Array(0x2CD3) // COPTIC CAPITAL LETTER OLD COPTIC HEI + case 0x2CD4 => Array(0x2CD5) // COPTIC CAPITAL LETTER OLD COPTIC HAT + case 0x2CD6 => Array(0x2CD7) // COPTIC CAPITAL LETTER OLD COPTIC GANGIA + case 0x2CD8 => Array(0x2CD9) // COPTIC CAPITAL LETTER OLD COPTIC DJA + case 0x2CDA => Array(0x2CDB) // COPTIC CAPITAL LETTER OLD COPTIC SHIMA + case 0x2CDC => Array(0x2CDD) // COPTIC CAPITAL LETTER OLD NUBIAN SHIMA + case 0x2CDE => Array(0x2CDF) // COPTIC CAPITAL LETTER OLD NUBIAN NGI + case 0x2CE0 => Array(0x2CE1) // COPTIC CAPITAL LETTER OLD NUBIAN NYI + case 0x2CE2 => Array(0x2CE3) // COPTIC CAPITAL LETTER OLD NUBIAN WAU + case 0x2CEB => Array(0x2CEC) // COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI + case 0x2CED => Array(0x2CEE) // COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA + case 0x2CF2 => Array(0x2CF3) // COPTIC CAPITAL LETTER BOHAIRIC KHEI + case 0xA640 => Array(0xA641) // CYRILLIC CAPITAL LETTER ZEMLYA + case 0xA642 => Array(0xA643) // CYRILLIC CAPITAL LETTER DZELO + case 0xA644 => Array(0xA645) // CYRILLIC CAPITAL LETTER REVERSED DZE + case 0xA646 => Array(0xA647) // CYRILLIC CAPITAL LETTER IOTA + case 0xA648 => Array(0xA649) // CYRILLIC CAPITAL LETTER DJERV + case 0xA64A => Array(0xA64B) // CYRILLIC CAPITAL LETTER MONOGRAPH UK + case 0xA64C => Array(0xA64D) // CYRILLIC CAPITAL LETTER BROAD OMEGA + case 0xA64E => Array(0xA64F) // CYRILLIC CAPITAL LETTER NEUTRAL YER + case 0xA650 => Array(0xA651) // CYRILLIC CAPITAL LETTER YERU WITH BACK YER + case 0xA652 => Array(0xA653) // CYRILLIC CAPITAL LETTER IOTIFIED YAT + case 0xA654 => Array(0xA655) // CYRILLIC CAPITAL LETTER REVERSED YU + case 0xA656 => Array(0xA657) // CYRILLIC CAPITAL LETTER IOTIFIED A + case 0xA658 => Array(0xA659) // CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS + case 0xA65A => Array(0xA65B) // CYRILLIC CAPITAL LETTER BLENDED YUS + case 0xA65C => Array(0xA65D) // CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS + case 0xA65E => Array(0xA65F) // CYRILLIC CAPITAL LETTER YN + case 0xA660 => Array(0xA661) // CYRILLIC CAPITAL LETTER REVERSED TSE + case 0xA662 => Array(0xA663) // CYRILLIC CAPITAL LETTER SOFT DE + case 0xA664 => Array(0xA665) // CYRILLIC CAPITAL LETTER SOFT EL + case 0xA666 => Array(0xA667) // CYRILLIC CAPITAL LETTER SOFT EM + case 0xA668 => Array(0xA669) // CYRILLIC CAPITAL LETTER MONOCULAR O + case 0xA66A => Array(0xA66B) // CYRILLIC CAPITAL LETTER BINOCULAR O + case 0xA66C => Array(0xA66D) // CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O + case 0xA680 => Array(0xA681) // CYRILLIC CAPITAL LETTER DWE + case 0xA682 => Array(0xA683) // CYRILLIC CAPITAL LETTER DZWE + case 0xA684 => Array(0xA685) // CYRILLIC CAPITAL LETTER ZHWE + case 0xA686 => Array(0xA687) // CYRILLIC CAPITAL LETTER CCHE + case 0xA688 => Array(0xA689) // CYRILLIC CAPITAL LETTER DZZE + case 0xA68A => Array(0xA68B) // CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK + case 0xA68C => Array(0xA68D) // CYRILLIC CAPITAL LETTER TWE + case 0xA68E => Array(0xA68F) // CYRILLIC CAPITAL LETTER TSWE + case 0xA690 => Array(0xA691) // CYRILLIC CAPITAL LETTER TSSE + case 0xA692 => Array(0xA693) // CYRILLIC CAPITAL LETTER TCHE + case 0xA694 => Array(0xA695) // CYRILLIC CAPITAL LETTER HWE + case 0xA696 => Array(0xA697) // CYRILLIC CAPITAL LETTER SHWE + case 0xA698 => Array(0xA699) // CYRILLIC CAPITAL LETTER DOUBLE O + case 0xA69A => Array(0xA69B) // CYRILLIC CAPITAL LETTER CROSSED O + case 0xA722 => Array(0xA723) // LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF + case 0xA724 => Array(0xA725) // LATIN CAPITAL LETTER EGYPTOLOGICAL AIN + case 0xA726 => Array(0xA727) // LATIN CAPITAL LETTER HENG + case 0xA728 => Array(0xA729) // LATIN CAPITAL LETTER TZ + case 0xA72A => Array(0xA72B) // LATIN CAPITAL LETTER TRESILLO + case 0xA72C => Array(0xA72D) // LATIN CAPITAL LETTER CUATRILLO + case 0xA72E => Array(0xA72F) // LATIN CAPITAL LETTER CUATRILLO WITH COMMA + case 0xA732 => Array(0xA733) // LATIN CAPITAL LETTER AA + case 0xA734 => Array(0xA735) // LATIN CAPITAL LETTER AO + case 0xA736 => Array(0xA737) // LATIN CAPITAL LETTER AU + case 0xA738 => Array(0xA739) // LATIN CAPITAL LETTER AV + case 0xA73A => Array(0xA73B) // LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR + case 0xA73C => Array(0xA73D) // LATIN CAPITAL LETTER AY + case 0xA73E => Array(0xA73F) // LATIN CAPITAL LETTER REVERSED C WITH DOT + case 0xA740 => Array(0xA741) // LATIN CAPITAL LETTER K WITH STROKE + case 0xA742 => Array(0xA743) // LATIN CAPITAL LETTER K WITH DIAGONAL STROKE + case 0xA744 => Array(0xA745) // LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE + case 0xA746 => Array(0xA747) // LATIN CAPITAL LETTER BROKEN L + case 0xA748 => Array(0xA749) // LATIN CAPITAL LETTER L WITH HIGH STROKE + case 0xA74A => Array(0xA74B) // LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY + case 0xA74C => Array(0xA74D) // LATIN CAPITAL LETTER O WITH LOOP + case 0xA74E => Array(0xA74F) // LATIN CAPITAL LETTER OO + case 0xA750 => Array(0xA751) // LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER + case 0xA752 => Array(0xA753) // LATIN CAPITAL LETTER P WITH FLOURISH + case 0xA754 => Array(0xA755) // LATIN CAPITAL LETTER P WITH SQUIRREL TAIL + case 0xA756 => Array(0xA757) // LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER + case 0xA758 => Array(0xA759) // LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE + case 0xA75A => Array(0xA75B) // LATIN CAPITAL LETTER R ROTUNDA + case 0xA75C => Array(0xA75D) // LATIN CAPITAL LETTER RUM ROTUNDA + case 0xA75E => Array(0xA75F) // LATIN CAPITAL LETTER V WITH DIAGONAL STROKE + case 0xA760 => Array(0xA761) // LATIN CAPITAL LETTER VY + case 0xA762 => Array(0xA763) // LATIN CAPITAL LETTER VISIGOTHIC Z + case 0xA764 => Array(0xA765) // LATIN CAPITAL LETTER THORN WITH STROKE + case 0xA766 => Array(0xA767) // LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER + case 0xA768 => Array(0xA769) // LATIN CAPITAL LETTER VEND + case 0xA76A => Array(0xA76B) // LATIN CAPITAL LETTER ET + case 0xA76C => Array(0xA76D) // LATIN CAPITAL LETTER IS + case 0xA76E => Array(0xA76F) // LATIN CAPITAL LETTER CON + case 0xA779 => Array(0xA77A) // LATIN CAPITAL LETTER INSULAR D + case 0xA77B => Array(0xA77C) // LATIN CAPITAL LETTER INSULAR F + case 0xA77D => Array(0x1D79) // LATIN CAPITAL LETTER INSULAR G + case 0xA77E => Array(0xA77F) // LATIN CAPITAL LETTER TURNED INSULAR G + case 0xA780 => Array(0xA781) // LATIN CAPITAL LETTER TURNED L + case 0xA782 => Array(0xA783) // LATIN CAPITAL LETTER INSULAR R + case 0xA784 => Array(0xA785) // LATIN CAPITAL LETTER INSULAR S + case 0xA786 => Array(0xA787) // LATIN CAPITAL LETTER INSULAR T + case 0xA78B => Array(0xA78C) // LATIN CAPITAL LETTER SALTILLO + case 0xA78D => Array(0x0265) // LATIN CAPITAL LETTER TURNED H + case 0xA790 => Array(0xA791) // LATIN CAPITAL LETTER N WITH DESCENDER + case 0xA792 => Array(0xA793) // LATIN CAPITAL LETTER C WITH BAR + case 0xA796 => Array(0xA797) // LATIN CAPITAL LETTER B WITH FLOURISH + case 0xA798 => Array(0xA799) // LATIN CAPITAL LETTER F WITH STROKE + case 0xA79A => Array(0xA79B) // LATIN CAPITAL LETTER VOLAPUK AE + case 0xA79C => Array(0xA79D) // LATIN CAPITAL LETTER VOLAPUK OE + case 0xA79E => Array(0xA79F) // LATIN CAPITAL LETTER VOLAPUK UE + case 0xA7A0 => Array(0xA7A1) // LATIN CAPITAL LETTER G WITH OBLIQUE STROKE + case 0xA7A2 => Array(0xA7A3) // LATIN CAPITAL LETTER K WITH OBLIQUE STROKE + case 0xA7A4 => Array(0xA7A5) // LATIN CAPITAL LETTER N WITH OBLIQUE STROKE + case 0xA7A6 => Array(0xA7A7) // LATIN CAPITAL LETTER R WITH OBLIQUE STROKE + case 0xA7A8 => Array(0xA7A9) // LATIN CAPITAL LETTER S WITH OBLIQUE STROKE + case 0xA7AA => Array(0x0266) // LATIN CAPITAL LETTER H WITH HOOK + case 0xA7AB => Array(0x025C) // LATIN CAPITAL LETTER REVERSED OPEN E + case 0xA7AC => Array(0x0261) // LATIN CAPITAL LETTER SCRIPT G + case 0xA7AD => Array(0x026C) // LATIN CAPITAL LETTER L WITH BELT + case 0xA7AE => Array(0x026A) // LATIN CAPITAL LETTER SMALL CAPITAL I + case 0xA7B0 => Array(0x029E) // LATIN CAPITAL LETTER TURNED K + case 0xA7B1 => Array(0x0287) // LATIN CAPITAL LETTER TURNED T + case 0xA7B2 => Array(0x029D) // LATIN CAPITAL LETTER J WITH CROSSED-TAIL + case 0xA7B3 => Array(0xAB53) // LATIN CAPITAL LETTER CHI + case 0xA7B4 => Array(0xA7B5) // LATIN CAPITAL LETTER BETA + case 0xA7B6 => Array(0xA7B7) // LATIN CAPITAL LETTER OMEGA + case 0xA7B8 => Array(0xA7B9) // LATIN CAPITAL LETTER U WITH STROKE + case 0xA7BA => Array(0xA7BB) // LATIN CAPITAL LETTER GLOTTAL A + case 0xA7BC => Array(0xA7BD) // LATIN CAPITAL LETTER GLOTTAL I + case 0xA7BE => Array(0xA7BF) // LATIN CAPITAL LETTER GLOTTAL U + case 0xA7C0 => Array(0xA7C1) // LATIN CAPITAL LETTER OLD POLISH O + case 0xA7C2 => Array(0xA7C3) // LATIN CAPITAL LETTER ANGLICANA W + case 0xA7C4 => Array(0xA794) // LATIN CAPITAL LETTER C WITH PALATAL HOOK + case 0xA7C5 => Array(0x0282) // LATIN CAPITAL LETTER S WITH HOOK + case 0xA7C6 => Array(0x1D8E) // LATIN CAPITAL LETTER Z WITH PALATAL HOOK + case 0xA7C7 => Array(0xA7C8) // LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY + case 0xA7C9 => Array(0xA7CA) // LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY + case 0xA7D0 => Array(0xA7D1) // LATIN CAPITAL LETTER CLOSED INSULAR G + case 0xA7D6 => Array(0xA7D7) // LATIN CAPITAL LETTER MIDDLE SCOTS S + case 0xA7D8 => Array(0xA7D9) // LATIN CAPITAL LETTER SIGMOID S + case 0xA7F5 => Array(0xA7F6) // LATIN CAPITAL LETTER REVERSED HALF H + case 0xAB70 => Array(0x13A0) // CHEROKEE SMALL LETTER A + case 0xAB71 => Array(0x13A1) // CHEROKEE SMALL LETTER E + case 0xAB72 => Array(0x13A2) // CHEROKEE SMALL LETTER I + case 0xAB73 => Array(0x13A3) // CHEROKEE SMALL LETTER O + case 0xAB74 => Array(0x13A4) // CHEROKEE SMALL LETTER U + case 0xAB75 => Array(0x13A5) // CHEROKEE SMALL LETTER V + case 0xAB76 => Array(0x13A6) // CHEROKEE SMALL LETTER GA + case 0xAB77 => Array(0x13A7) // CHEROKEE SMALL LETTER KA + case 0xAB78 => Array(0x13A8) // CHEROKEE SMALL LETTER GE + case 0xAB79 => Array(0x13A9) // CHEROKEE SMALL LETTER GI + case 0xAB7A => Array(0x13AA) // CHEROKEE SMALL LETTER GO + case 0xAB7B => Array(0x13AB) // CHEROKEE SMALL LETTER GU + case 0xAB7C => Array(0x13AC) // CHEROKEE SMALL LETTER GV + case 0xAB7D => Array(0x13AD) // CHEROKEE SMALL LETTER HA + case 0xAB7E => Array(0x13AE) // CHEROKEE SMALL LETTER HE + case 0xAB7F => Array(0x13AF) // CHEROKEE SMALL LETTER HI + case 0xAB80 => Array(0x13B0) // CHEROKEE SMALL LETTER HO + case 0xAB81 => Array(0x13B1) // CHEROKEE SMALL LETTER HU + case 0xAB82 => Array(0x13B2) // CHEROKEE SMALL LETTER HV + case 0xAB83 => Array(0x13B3) // CHEROKEE SMALL LETTER LA + case 0xAB84 => Array(0x13B4) // CHEROKEE SMALL LETTER LE + case 0xAB85 => Array(0x13B5) // CHEROKEE SMALL LETTER LI + case 0xAB86 => Array(0x13B6) // CHEROKEE SMALL LETTER LO + case 0xAB87 => Array(0x13B7) // CHEROKEE SMALL LETTER LU + case 0xAB88 => Array(0x13B8) // CHEROKEE SMALL LETTER LV + case 0xAB89 => Array(0x13B9) // CHEROKEE SMALL LETTER MA + case 0xAB8A => Array(0x13BA) // CHEROKEE SMALL LETTER ME + case 0xAB8B => Array(0x13BB) // CHEROKEE SMALL LETTER MI + case 0xAB8C => Array(0x13BC) // CHEROKEE SMALL LETTER MO + case 0xAB8D => Array(0x13BD) // CHEROKEE SMALL LETTER MU + case 0xAB8E => Array(0x13BE) // CHEROKEE SMALL LETTER NA + case 0xAB8F => Array(0x13BF) // CHEROKEE SMALL LETTER HNA + case 0xAB90 => Array(0x13C0) // CHEROKEE SMALL LETTER NAH + case 0xAB91 => Array(0x13C1) // CHEROKEE SMALL LETTER NE + case 0xAB92 => Array(0x13C2) // CHEROKEE SMALL LETTER NI + case 0xAB93 => Array(0x13C3) // CHEROKEE SMALL LETTER NO + case 0xAB94 => Array(0x13C4) // CHEROKEE SMALL LETTER NU + case 0xAB95 => Array(0x13C5) // CHEROKEE SMALL LETTER NV + case 0xAB96 => Array(0x13C6) // CHEROKEE SMALL LETTER QUA + case 0xAB97 => Array(0x13C7) // CHEROKEE SMALL LETTER QUE + case 0xAB98 => Array(0x13C8) // CHEROKEE SMALL LETTER QUI + case 0xAB99 => Array(0x13C9) // CHEROKEE SMALL LETTER QUO + case 0xAB9A => Array(0x13CA) // CHEROKEE SMALL LETTER QUU + case 0xAB9B => Array(0x13CB) // CHEROKEE SMALL LETTER QUV + case 0xAB9C => Array(0x13CC) // CHEROKEE SMALL LETTER SA + case 0xAB9D => Array(0x13CD) // CHEROKEE SMALL LETTER S + case 0xAB9E => Array(0x13CE) // CHEROKEE SMALL LETTER SE + case 0xAB9F => Array(0x13CF) // CHEROKEE SMALL LETTER SI + case 0xABA0 => Array(0x13D0) // CHEROKEE SMALL LETTER SO + case 0xABA1 => Array(0x13D1) // CHEROKEE SMALL LETTER SU + case 0xABA2 => Array(0x13D2) // CHEROKEE SMALL LETTER SV + case 0xABA3 => Array(0x13D3) // CHEROKEE SMALL LETTER DA + case 0xABA4 => Array(0x13D4) // CHEROKEE SMALL LETTER TA + case 0xABA5 => Array(0x13D5) // CHEROKEE SMALL LETTER DE + case 0xABA6 => Array(0x13D6) // CHEROKEE SMALL LETTER TE + case 0xABA7 => Array(0x13D7) // CHEROKEE SMALL LETTER DI + case 0xABA8 => Array(0x13D8) // CHEROKEE SMALL LETTER TI + case 0xABA9 => Array(0x13D9) // CHEROKEE SMALL LETTER DO + case 0xABAA => Array(0x13DA) // CHEROKEE SMALL LETTER DU + case 0xABAB => Array(0x13DB) // CHEROKEE SMALL LETTER DV + case 0xABAC => Array(0x13DC) // CHEROKEE SMALL LETTER DLA + case 0xABAD => Array(0x13DD) // CHEROKEE SMALL LETTER TLA + case 0xABAE => Array(0x13DE) // CHEROKEE SMALL LETTER TLE + case 0xABAF => Array(0x13DF) // CHEROKEE SMALL LETTER TLI + case 0xABB0 => Array(0x13E0) // CHEROKEE SMALL LETTER TLO + case 0xABB1 => Array(0x13E1) // CHEROKEE SMALL LETTER TLU + case 0xABB2 => Array(0x13E2) // CHEROKEE SMALL LETTER TLV + case 0xABB3 => Array(0x13E3) // CHEROKEE SMALL LETTER TSA + case 0xABB4 => Array(0x13E4) // CHEROKEE SMALL LETTER TSE + case 0xABB5 => Array(0x13E5) // CHEROKEE SMALL LETTER TSI + case 0xABB6 => Array(0x13E6) // CHEROKEE SMALL LETTER TSO + case 0xABB7 => Array(0x13E7) // CHEROKEE SMALL LETTER TSU + case 0xABB8 => Array(0x13E8) // CHEROKEE SMALL LETTER TSV + case 0xABB9 => Array(0x13E9) // CHEROKEE SMALL LETTER WA + case 0xABBA => Array(0x13EA) // CHEROKEE SMALL LETTER WE + case 0xABBB => Array(0x13EB) // CHEROKEE SMALL LETTER WI + case 0xABBC => Array(0x13EC) // CHEROKEE SMALL LETTER WO + case 0xABBD => Array(0x13ED) // CHEROKEE SMALL LETTER WU + case 0xABBE => Array(0x13EE) // CHEROKEE SMALL LETTER WV + case 0xABBF => Array(0x13EF) // CHEROKEE SMALL LETTER YA + case 0xFF21 => Array(0xFF41) // FULLWIDTH LATIN CAPITAL LETTER A + case 0xFF22 => Array(0xFF42) // FULLWIDTH LATIN CAPITAL LETTER B + case 0xFF23 => Array(0xFF43) // FULLWIDTH LATIN CAPITAL LETTER C + case 0xFF24 => Array(0xFF44) // FULLWIDTH LATIN CAPITAL LETTER D + case 0xFF25 => Array(0xFF45) // FULLWIDTH LATIN CAPITAL LETTER E + case 0xFF26 => Array(0xFF46) // FULLWIDTH LATIN CAPITAL LETTER F + case 0xFF27 => Array(0xFF47) // FULLWIDTH LATIN CAPITAL LETTER G + case 0xFF28 => Array(0xFF48) // FULLWIDTH LATIN CAPITAL LETTER H + case 0xFF29 => Array(0xFF49) // FULLWIDTH LATIN CAPITAL LETTER I + case 0xFF2A => Array(0xFF4A) // FULLWIDTH LATIN CAPITAL LETTER J + case 0xFF2B => Array(0xFF4B) // FULLWIDTH LATIN CAPITAL LETTER K + case 0xFF2C => Array(0xFF4C) // FULLWIDTH LATIN CAPITAL LETTER L + case 0xFF2D => Array(0xFF4D) // FULLWIDTH LATIN CAPITAL LETTER M + case 0xFF2E => Array(0xFF4E) // FULLWIDTH LATIN CAPITAL LETTER N + case 0xFF2F => Array(0xFF4F) // FULLWIDTH LATIN CAPITAL LETTER O + case 0xFF30 => Array(0xFF50) // FULLWIDTH LATIN CAPITAL LETTER P + case 0xFF31 => Array(0xFF51) // FULLWIDTH LATIN CAPITAL LETTER Q + case 0xFF32 => Array(0xFF52) // FULLWIDTH LATIN CAPITAL LETTER R + case 0xFF33 => Array(0xFF53) // FULLWIDTH LATIN CAPITAL LETTER S + case 0xFF34 => Array(0xFF54) // FULLWIDTH LATIN CAPITAL LETTER T + case 0xFF35 => Array(0xFF55) // FULLWIDTH LATIN CAPITAL LETTER U + case 0xFF36 => Array(0xFF56) // FULLWIDTH LATIN CAPITAL LETTER V + case 0xFF37 => Array(0xFF57) // FULLWIDTH LATIN CAPITAL LETTER W + case 0xFF38 => Array(0xFF58) // FULLWIDTH LATIN CAPITAL LETTER X + case 0xFF39 => Array(0xFF59) // FULLWIDTH LATIN CAPITAL LETTER Y + case 0xFF3A => Array(0xFF5A) // FULLWIDTH LATIN CAPITAL LETTER Z + case 0x10400 => Array(0x10428) // DESERET CAPITAL LETTER LONG I + case 0x10401 => Array(0x10429) // DESERET CAPITAL LETTER LONG E + case 0x10402 => Array(0x1042A) // DESERET CAPITAL LETTER LONG A + case 0x10403 => Array(0x1042B) // DESERET CAPITAL LETTER LONG AH + case 0x10404 => Array(0x1042C) // DESERET CAPITAL LETTER LONG O + case 0x10405 => Array(0x1042D) // DESERET CAPITAL LETTER LONG OO + case 0x10406 => Array(0x1042E) // DESERET CAPITAL LETTER SHORT I + case 0x10407 => Array(0x1042F) // DESERET CAPITAL LETTER SHORT E + case 0x10408 => Array(0x10430) // DESERET CAPITAL LETTER SHORT A + case 0x10409 => Array(0x10431) // DESERET CAPITAL LETTER SHORT AH + case 0x1040A => Array(0x10432) // DESERET CAPITAL LETTER SHORT O + case 0x1040B => Array(0x10433) // DESERET CAPITAL LETTER SHORT OO + case 0x1040C => Array(0x10434) // DESERET CAPITAL LETTER AY + case 0x1040D => Array(0x10435) // DESERET CAPITAL LETTER OW + case 0x1040E => Array(0x10436) // DESERET CAPITAL LETTER WU + case 0x1040F => Array(0x10437) // DESERET CAPITAL LETTER YEE + case 0x10410 => Array(0x10438) // DESERET CAPITAL LETTER H + case 0x10411 => Array(0x10439) // DESERET CAPITAL LETTER PEE + case 0x10412 => Array(0x1043A) // DESERET CAPITAL LETTER BEE + case 0x10413 => Array(0x1043B) // DESERET CAPITAL LETTER TEE + case 0x10414 => Array(0x1043C) // DESERET CAPITAL LETTER DEE + case 0x10415 => Array(0x1043D) // DESERET CAPITAL LETTER CHEE + case 0x10416 => Array(0x1043E) // DESERET CAPITAL LETTER JEE + case 0x10417 => Array(0x1043F) // DESERET CAPITAL LETTER KAY + case 0x10418 => Array(0x10440) // DESERET CAPITAL LETTER GAY + case 0x10419 => Array(0x10441) // DESERET CAPITAL LETTER EF + case 0x1041A => Array(0x10442) // DESERET CAPITAL LETTER VEE + case 0x1041B => Array(0x10443) // DESERET CAPITAL LETTER ETH + case 0x1041C => Array(0x10444) // DESERET CAPITAL LETTER THEE + case 0x1041D => Array(0x10445) // DESERET CAPITAL LETTER ES + case 0x1041E => Array(0x10446) // DESERET CAPITAL LETTER ZEE + case 0x1041F => Array(0x10447) // DESERET CAPITAL LETTER ESH + case 0x10420 => Array(0x10448) // DESERET CAPITAL LETTER ZHEE + case 0x10421 => Array(0x10449) // DESERET CAPITAL LETTER ER + case 0x10422 => Array(0x1044A) // DESERET CAPITAL LETTER EL + case 0x10423 => Array(0x1044B) // DESERET CAPITAL LETTER EM + case 0x10424 => Array(0x1044C) // DESERET CAPITAL LETTER EN + case 0x10425 => Array(0x1044D) // DESERET CAPITAL LETTER ENG + case 0x10426 => Array(0x1044E) // DESERET CAPITAL LETTER OI + case 0x10427 => Array(0x1044F) // DESERET CAPITAL LETTER EW + case 0x104B0 => Array(0x104D8) // OSAGE CAPITAL LETTER A + case 0x104B1 => Array(0x104D9) // OSAGE CAPITAL LETTER AI + case 0x104B2 => Array(0x104DA) // OSAGE CAPITAL LETTER AIN + case 0x104B3 => Array(0x104DB) // OSAGE CAPITAL LETTER AH + case 0x104B4 => Array(0x104DC) // OSAGE CAPITAL LETTER BRA + case 0x104B5 => Array(0x104DD) // OSAGE CAPITAL LETTER CHA + case 0x104B6 => Array(0x104DE) // OSAGE CAPITAL LETTER EHCHA + case 0x104B7 => Array(0x104DF) // OSAGE CAPITAL LETTER E + case 0x104B8 => Array(0x104E0) // OSAGE CAPITAL LETTER EIN + case 0x104B9 => Array(0x104E1) // OSAGE CAPITAL LETTER HA + case 0x104BA => Array(0x104E2) // OSAGE CAPITAL LETTER HYA + case 0x104BB => Array(0x104E3) // OSAGE CAPITAL LETTER I + case 0x104BC => Array(0x104E4) // OSAGE CAPITAL LETTER KA + case 0x104BD => Array(0x104E5) // OSAGE CAPITAL LETTER EHKA + case 0x104BE => Array(0x104E6) // OSAGE CAPITAL LETTER KYA + case 0x104BF => Array(0x104E7) // OSAGE CAPITAL LETTER LA + case 0x104C0 => Array(0x104E8) // OSAGE CAPITAL LETTER MA + case 0x104C1 => Array(0x104E9) // OSAGE CAPITAL LETTER NA + case 0x104C2 => Array(0x104EA) // OSAGE CAPITAL LETTER O + case 0x104C3 => Array(0x104EB) // OSAGE CAPITAL LETTER OIN + case 0x104C4 => Array(0x104EC) // OSAGE CAPITAL LETTER PA + case 0x104C5 => Array(0x104ED) // OSAGE CAPITAL LETTER EHPA + case 0x104C6 => Array(0x104EE) // OSAGE CAPITAL LETTER SA + case 0x104C7 => Array(0x104EF) // OSAGE CAPITAL LETTER SHA + case 0x104C8 => Array(0x104F0) // OSAGE CAPITAL LETTER TA + case 0x104C9 => Array(0x104F1) // OSAGE CAPITAL LETTER EHTA + case 0x104CA => Array(0x104F2) // OSAGE CAPITAL LETTER TSA + case 0x104CB => Array(0x104F3) // OSAGE CAPITAL LETTER EHTSA + case 0x104CC => Array(0x104F4) // OSAGE CAPITAL LETTER TSHA + case 0x104CD => Array(0x104F5) // OSAGE CAPITAL LETTER DHA + case 0x104CE => Array(0x104F6) // OSAGE CAPITAL LETTER U + case 0x104CF => Array(0x104F7) // OSAGE CAPITAL LETTER WA + case 0x104D0 => Array(0x104F8) // OSAGE CAPITAL LETTER KHA + case 0x104D1 => Array(0x104F9) // OSAGE CAPITAL LETTER GHA + case 0x104D2 => Array(0x104FA) // OSAGE CAPITAL LETTER ZA + case 0x104D3 => Array(0x104FB) // OSAGE CAPITAL LETTER ZHA + case 0x10570 => Array(0x10597) // VITHKUQI CAPITAL LETTER A + case 0x10571 => Array(0x10598) // VITHKUQI CAPITAL LETTER BBE + case 0x10572 => Array(0x10599) // VITHKUQI CAPITAL LETTER BE + case 0x10573 => Array(0x1059A) // VITHKUQI CAPITAL LETTER CE + case 0x10574 => Array(0x1059B) // VITHKUQI CAPITAL LETTER CHE + case 0x10575 => Array(0x1059C) // VITHKUQI CAPITAL LETTER DE + case 0x10576 => Array(0x1059D) // VITHKUQI CAPITAL LETTER DHE + case 0x10577 => Array(0x1059E) // VITHKUQI CAPITAL LETTER EI + case 0x10578 => Array(0x1059F) // VITHKUQI CAPITAL LETTER E + case 0x10579 => Array(0x105A0) // VITHKUQI CAPITAL LETTER FE + case 0x1057A => Array(0x105A1) // VITHKUQI CAPITAL LETTER GA + case 0x1057C => Array(0x105A3) // VITHKUQI CAPITAL LETTER HA + case 0x1057D => Array(0x105A4) // VITHKUQI CAPITAL LETTER HHA + case 0x1057E => Array(0x105A5) // VITHKUQI CAPITAL LETTER I + case 0x1057F => Array(0x105A6) // VITHKUQI CAPITAL LETTER IJE + case 0x10580 => Array(0x105A7) // VITHKUQI CAPITAL LETTER JE + case 0x10581 => Array(0x105A8) // VITHKUQI CAPITAL LETTER KA + case 0x10582 => Array(0x105A9) // VITHKUQI CAPITAL LETTER LA + case 0x10583 => Array(0x105AA) // VITHKUQI CAPITAL LETTER LLA + case 0x10584 => Array(0x105AB) // VITHKUQI CAPITAL LETTER ME + case 0x10585 => Array(0x105AC) // VITHKUQI CAPITAL LETTER NE + case 0x10586 => Array(0x105AD) // VITHKUQI CAPITAL LETTER NJE + case 0x10587 => Array(0x105AE) // VITHKUQI CAPITAL LETTER O + case 0x10588 => Array(0x105AF) // VITHKUQI CAPITAL LETTER PE + case 0x10589 => Array(0x105B0) // VITHKUQI CAPITAL LETTER QA + case 0x1058A => Array(0x105B1) // VITHKUQI CAPITAL LETTER RE + case 0x1058C => Array(0x105B3) // VITHKUQI CAPITAL LETTER SE + case 0x1058D => Array(0x105B4) // VITHKUQI CAPITAL LETTER SHE + case 0x1058E => Array(0x105B5) // VITHKUQI CAPITAL LETTER TE + case 0x1058F => Array(0x105B6) // VITHKUQI CAPITAL LETTER THE + case 0x10590 => Array(0x105B7) // VITHKUQI CAPITAL LETTER U + case 0x10591 => Array(0x105B8) // VITHKUQI CAPITAL LETTER VE + case 0x10592 => Array(0x105B9) // VITHKUQI CAPITAL LETTER XE + case 0x10594 => Array(0x105BB) // VITHKUQI CAPITAL LETTER Y + case 0x10595 => Array(0x105BC) // VITHKUQI CAPITAL LETTER ZE + case 0x10C80 => Array(0x10CC0) // OLD HUNGARIAN CAPITAL LETTER A + case 0x10C81 => Array(0x10CC1) // OLD HUNGARIAN CAPITAL LETTER AA + case 0x10C82 => Array(0x10CC2) // OLD HUNGARIAN CAPITAL LETTER EB + case 0x10C83 => Array(0x10CC3) // OLD HUNGARIAN CAPITAL LETTER AMB + case 0x10C84 => Array(0x10CC4) // OLD HUNGARIAN CAPITAL LETTER EC + case 0x10C85 => Array(0x10CC5) // OLD HUNGARIAN CAPITAL LETTER ENC + case 0x10C86 => Array(0x10CC6) // OLD HUNGARIAN CAPITAL LETTER ECS + case 0x10C87 => Array(0x10CC7) // OLD HUNGARIAN CAPITAL LETTER ED + case 0x10C88 => Array(0x10CC8) // OLD HUNGARIAN CAPITAL LETTER AND + case 0x10C89 => Array(0x10CC9) // OLD HUNGARIAN CAPITAL LETTER E + case 0x10C8A => Array(0x10CCA) // OLD HUNGARIAN CAPITAL LETTER CLOSE E + case 0x10C8B => Array(0x10CCB) // OLD HUNGARIAN CAPITAL LETTER EE + case 0x10C8C => Array(0x10CCC) // OLD HUNGARIAN CAPITAL LETTER EF + case 0x10C8D => Array(0x10CCD) // OLD HUNGARIAN CAPITAL LETTER EG + case 0x10C8E => Array(0x10CCE) // OLD HUNGARIAN CAPITAL LETTER EGY + case 0x10C8F => Array(0x10CCF) // OLD HUNGARIAN CAPITAL LETTER EH + case 0x10C90 => Array(0x10CD0) // OLD HUNGARIAN CAPITAL LETTER I + case 0x10C91 => Array(0x10CD1) // OLD HUNGARIAN CAPITAL LETTER II + case 0x10C92 => Array(0x10CD2) // OLD HUNGARIAN CAPITAL LETTER EJ + case 0x10C93 => Array(0x10CD3) // OLD HUNGARIAN CAPITAL LETTER EK + case 0x10C94 => Array(0x10CD4) // OLD HUNGARIAN CAPITAL LETTER AK + case 0x10C95 => Array(0x10CD5) // OLD HUNGARIAN CAPITAL LETTER UNK + case 0x10C96 => Array(0x10CD6) // OLD HUNGARIAN CAPITAL LETTER EL + case 0x10C97 => Array(0x10CD7) // OLD HUNGARIAN CAPITAL LETTER ELY + case 0x10C98 => Array(0x10CD8) // OLD HUNGARIAN CAPITAL LETTER EM + case 0x10C99 => Array(0x10CD9) // OLD HUNGARIAN CAPITAL LETTER EN + case 0x10C9A => Array(0x10CDA) // OLD HUNGARIAN CAPITAL LETTER ENY + case 0x10C9B => Array(0x10CDB) // OLD HUNGARIAN CAPITAL LETTER O + case 0x10C9C => Array(0x10CDC) // OLD HUNGARIAN CAPITAL LETTER OO + case 0x10C9D => Array(0x10CDD) // OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE + case 0x10C9E => Array(0x10CDE) // OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE + case 0x10C9F => Array(0x10CDF) // OLD HUNGARIAN CAPITAL LETTER OEE + case 0x10CA0 => Array(0x10CE0) // OLD HUNGARIAN CAPITAL LETTER EP + case 0x10CA1 => Array(0x10CE1) // OLD HUNGARIAN CAPITAL LETTER EMP + case 0x10CA2 => Array(0x10CE2) // OLD HUNGARIAN CAPITAL LETTER ER + case 0x10CA3 => Array(0x10CE3) // OLD HUNGARIAN CAPITAL LETTER SHORT ER + case 0x10CA4 => Array(0x10CE4) // OLD HUNGARIAN CAPITAL LETTER ES + case 0x10CA5 => Array(0x10CE5) // OLD HUNGARIAN CAPITAL LETTER ESZ + case 0x10CA6 => Array(0x10CE6) // OLD HUNGARIAN CAPITAL LETTER ET + case 0x10CA7 => Array(0x10CE7) // OLD HUNGARIAN CAPITAL LETTER ENT + case 0x10CA8 => Array(0x10CE8) // OLD HUNGARIAN CAPITAL LETTER ETY + case 0x10CA9 => Array(0x10CE9) // OLD HUNGARIAN CAPITAL LETTER ECH + case 0x10CAA => Array(0x10CEA) // OLD HUNGARIAN CAPITAL LETTER U + case 0x10CAB => Array(0x10CEB) // OLD HUNGARIAN CAPITAL LETTER UU + case 0x10CAC => Array(0x10CEC) // OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE + case 0x10CAD => Array(0x10CED) // OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE + case 0x10CAE => Array(0x10CEE) // OLD HUNGARIAN CAPITAL LETTER EV + case 0x10CAF => Array(0x10CEF) // OLD HUNGARIAN CAPITAL LETTER EZ + case 0x10CB0 => Array(0x10CF0) // OLD HUNGARIAN CAPITAL LETTER EZS + case 0x10CB1 => Array(0x10CF1) // OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN + case 0x10CB2 => Array(0x10CF2) // OLD HUNGARIAN CAPITAL LETTER US + case 0x118A0 => Array(0x118C0) // WARANG CITI CAPITAL LETTER NGAA + case 0x118A1 => Array(0x118C1) // WARANG CITI CAPITAL LETTER A + case 0x118A2 => Array(0x118C2) // WARANG CITI CAPITAL LETTER WI + case 0x118A3 => Array(0x118C3) // WARANG CITI CAPITAL LETTER YU + case 0x118A4 => Array(0x118C4) // WARANG CITI CAPITAL LETTER YA + case 0x118A5 => Array(0x118C5) // WARANG CITI CAPITAL LETTER YO + case 0x118A6 => Array(0x118C6) // WARANG CITI CAPITAL LETTER II + case 0x118A7 => Array(0x118C7) // WARANG CITI CAPITAL LETTER UU + case 0x118A8 => Array(0x118C8) // WARANG CITI CAPITAL LETTER E + case 0x118A9 => Array(0x118C9) // WARANG CITI CAPITAL LETTER O + case 0x118AA => Array(0x118CA) // WARANG CITI CAPITAL LETTER ANG + case 0x118AB => Array(0x118CB) // WARANG CITI CAPITAL LETTER GA + case 0x118AC => Array(0x118CC) // WARANG CITI CAPITAL LETTER KO + case 0x118AD => Array(0x118CD) // WARANG CITI CAPITAL LETTER ENY + case 0x118AE => Array(0x118CE) // WARANG CITI CAPITAL LETTER YUJ + case 0x118AF => Array(0x118CF) // WARANG CITI CAPITAL LETTER UC + case 0x118B0 => Array(0x118D0) // WARANG CITI CAPITAL LETTER ENN + case 0x118B1 => Array(0x118D1) // WARANG CITI CAPITAL LETTER ODD + case 0x118B2 => Array(0x118D2) // WARANG CITI CAPITAL LETTER TTE + case 0x118B3 => Array(0x118D3) // WARANG CITI CAPITAL LETTER NUNG + case 0x118B4 => Array(0x118D4) // WARANG CITI CAPITAL LETTER DA + case 0x118B5 => Array(0x118D5) // WARANG CITI CAPITAL LETTER AT + case 0x118B6 => Array(0x118D6) // WARANG CITI CAPITAL LETTER AM + case 0x118B7 => Array(0x118D7) // WARANG CITI CAPITAL LETTER BU + case 0x118B8 => Array(0x118D8) // WARANG CITI CAPITAL LETTER PU + case 0x118B9 => Array(0x118D9) // WARANG CITI CAPITAL LETTER HIYO + case 0x118BA => Array(0x118DA) // WARANG CITI CAPITAL LETTER HOLO + case 0x118BB => Array(0x118DB) // WARANG CITI CAPITAL LETTER HORR + case 0x118BC => Array(0x118DC) // WARANG CITI CAPITAL LETTER HAR + case 0x118BD => Array(0x118DD) // WARANG CITI CAPITAL LETTER SSUU + case 0x118BE => Array(0x118DE) // WARANG CITI CAPITAL LETTER SII + case 0x118BF => Array(0x118DF) // WARANG CITI CAPITAL LETTER VIYO + case 0x16E40 => Array(0x16E60) // MEDEFAIDRIN CAPITAL LETTER M + case 0x16E41 => Array(0x16E61) // MEDEFAIDRIN CAPITAL LETTER S + case 0x16E42 => Array(0x16E62) // MEDEFAIDRIN CAPITAL LETTER V + case 0x16E43 => Array(0x16E63) // MEDEFAIDRIN CAPITAL LETTER W + case 0x16E44 => Array(0x16E64) // MEDEFAIDRIN CAPITAL LETTER ATIU + case 0x16E45 => Array(0x16E65) // MEDEFAIDRIN CAPITAL LETTER Z + case 0x16E46 => Array(0x16E66) // MEDEFAIDRIN CAPITAL LETTER KP + case 0x16E47 => Array(0x16E67) // MEDEFAIDRIN CAPITAL LETTER P + case 0x16E48 => Array(0x16E68) // MEDEFAIDRIN CAPITAL LETTER T + case 0x16E49 => Array(0x16E69) // MEDEFAIDRIN CAPITAL LETTER G + case 0x16E4A => Array(0x16E6A) // MEDEFAIDRIN CAPITAL LETTER F + case 0x16E4B => Array(0x16E6B) // MEDEFAIDRIN CAPITAL LETTER I + case 0x16E4C => Array(0x16E6C) // MEDEFAIDRIN CAPITAL LETTER K + case 0x16E4D => Array(0x16E6D) // MEDEFAIDRIN CAPITAL LETTER A + case 0x16E4E => Array(0x16E6E) // MEDEFAIDRIN CAPITAL LETTER J + case 0x16E4F => Array(0x16E6F) // MEDEFAIDRIN CAPITAL LETTER E + case 0x16E50 => Array(0x16E70) // MEDEFAIDRIN CAPITAL LETTER B + case 0x16E51 => Array(0x16E71) // MEDEFAIDRIN CAPITAL LETTER C + case 0x16E52 => Array(0x16E72) // MEDEFAIDRIN CAPITAL LETTER U + case 0x16E53 => Array(0x16E73) // MEDEFAIDRIN CAPITAL LETTER YU + case 0x16E54 => Array(0x16E74) // MEDEFAIDRIN CAPITAL LETTER L + case 0x16E55 => Array(0x16E75) // MEDEFAIDRIN CAPITAL LETTER Q + case 0x16E56 => Array(0x16E76) // MEDEFAIDRIN CAPITAL LETTER HP + case 0x16E57 => Array(0x16E77) // MEDEFAIDRIN CAPITAL LETTER NY + case 0x16E58 => Array(0x16E78) // MEDEFAIDRIN CAPITAL LETTER X + case 0x16E59 => Array(0x16E79) // MEDEFAIDRIN CAPITAL LETTER D + case 0x16E5A => Array(0x16E7A) // MEDEFAIDRIN CAPITAL LETTER OE + case 0x16E5B => Array(0x16E7B) // MEDEFAIDRIN CAPITAL LETTER N + case 0x16E5C => Array(0x16E7C) // MEDEFAIDRIN CAPITAL LETTER R + case 0x16E5D => Array(0x16E7D) // MEDEFAIDRIN CAPITAL LETTER O + case 0x16E5E => Array(0x16E7E) // MEDEFAIDRIN CAPITAL LETTER AI + case 0x16E5F => Array(0x16E7F) // MEDEFAIDRIN CAPITAL LETTER Y + case 0x1E900 => Array(0x1E922) // ADLAM CAPITAL LETTER ALIF + case 0x1E901 => Array(0x1E923) // ADLAM CAPITAL LETTER DAALI + case 0x1E902 => Array(0x1E924) // ADLAM CAPITAL LETTER LAAM + case 0x1E903 => Array(0x1E925) // ADLAM CAPITAL LETTER MIIM + case 0x1E904 => Array(0x1E926) // ADLAM CAPITAL LETTER BA + case 0x1E905 => Array(0x1E927) // ADLAM CAPITAL LETTER SINNYIIYHE + case 0x1E906 => Array(0x1E928) // ADLAM CAPITAL LETTER PE + case 0x1E907 => Array(0x1E929) // ADLAM CAPITAL LETTER BHE + case 0x1E908 => Array(0x1E92A) // ADLAM CAPITAL LETTER RA + case 0x1E909 => Array(0x1E92B) // ADLAM CAPITAL LETTER E + case 0x1E90A => Array(0x1E92C) // ADLAM CAPITAL LETTER FA + case 0x1E90B => Array(0x1E92D) // ADLAM CAPITAL LETTER I + case 0x1E90C => Array(0x1E92E) // ADLAM CAPITAL LETTER O + case 0x1E90D => Array(0x1E92F) // ADLAM CAPITAL LETTER DHA + case 0x1E90E => Array(0x1E930) // ADLAM CAPITAL LETTER YHE + case 0x1E90F => Array(0x1E931) // ADLAM CAPITAL LETTER WAW + case 0x1E910 => Array(0x1E932) // ADLAM CAPITAL LETTER NUN + case 0x1E911 => Array(0x1E933) // ADLAM CAPITAL LETTER KAF + case 0x1E912 => Array(0x1E934) // ADLAM CAPITAL LETTER YA + case 0x1E913 => Array(0x1E935) // ADLAM CAPITAL LETTER U + case 0x1E914 => Array(0x1E936) // ADLAM CAPITAL LETTER JIIM + case 0x1E915 => Array(0x1E937) // ADLAM CAPITAL LETTER CHI + case 0x1E916 => Array(0x1E938) // ADLAM CAPITAL LETTER HA + case 0x1E917 => Array(0x1E939) // ADLAM CAPITAL LETTER QAAF + case 0x1E918 => Array(0x1E93A) // ADLAM CAPITAL LETTER GA + case 0x1E919 => Array(0x1E93B) // ADLAM CAPITAL LETTER NYA + case 0x1E91A => Array(0x1E93C) // ADLAM CAPITAL LETTER TU + case 0x1E91B => Array(0x1E93D) // ADLAM CAPITAL LETTER NHA + case 0x1E91C => Array(0x1E93E) // ADLAM CAPITAL LETTER VA + case 0x1E91D => Array(0x1E93F) // ADLAM CAPITAL LETTER KHA + case 0x1E91E => Array(0x1E940) // ADLAM CAPITAL LETTER GBE + case 0x1E91F => Array(0x1E941) // ADLAM CAPITAL LETTER ZAL + case 0x1E920 => Array(0x1E942) // ADLAM CAPITAL LETTER KPO + case 0x1E921 => Array(0x1E943) // ADLAM CAPITAL LETTER SHA + case 0x00DF => Array(0x0073, 0x0073) // LATIN SMALL LETTER SHARP S + case 0x0130 => Array(0x0069, 0x0307) // LATIN CAPITAL LETTER I WITH DOT ABOVE + case 0x0149 => Array(0x02BC, 0x006E) // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE + case 0x01F0 => Array(0x006A, 0x030C) // LATIN SMALL LETTER J WITH CARON + case 0x0390 => Array(0x03B9, 0x0308, 0x0301) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + case 0x03B0 => Array(0x03C5, 0x0308, 0x0301) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + case 0x0587 => Array(0x0565, 0x0582) // ARMENIAN SMALL LIGATURE ECH YIWN + case 0x1E96 => Array(0x0068, 0x0331) // LATIN SMALL LETTER H WITH LINE BELOW + case 0x1E97 => Array(0x0074, 0x0308) // LATIN SMALL LETTER T WITH DIAERESIS + case 0x1E98 => Array(0x0077, 0x030A) // LATIN SMALL LETTER W WITH RING ABOVE + case 0x1E99 => Array(0x0079, 0x030A) // LATIN SMALL LETTER Y WITH RING ABOVE + case 0x1E9A => Array(0x0061, 0x02BE) // LATIN SMALL LETTER A WITH RIGHT HALF RING + case 0x1E9E => Array(0x0073, 0x0073) // LATIN CAPITAL LETTER SHARP S + case 0x1F50 => Array(0x03C5, 0x0313) // GREEK SMALL LETTER UPSILON WITH PSILI + case 0x1F52 => Array(0x03C5, 0x0313, 0x0300) // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA + case 0x1F54 => Array(0x03C5, 0x0313, 0x0301) // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA + case 0x1F56 => Array(0x03C5, 0x0313, 0x0342) // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI + case 0x1F80 => Array(0x1F00, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI + case 0x1F81 => Array(0x1F01, 0x03B9) // GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI + case 0x1F82 => Array(0x1F02, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI + case 0x1F83 => Array(0x1F03, 0x03B9) // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI + case 0x1F84 => Array(0x1F04, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI + case 0x1F85 => Array(0x1F05, 0x03B9) // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI + case 0x1F86 => Array(0x1F06, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1F87 => Array(0x1F07, 0x03B9) // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1F88 => Array(0x1F00, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI + case 0x1F89 => Array(0x1F01, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI + case 0x1F8A => Array(0x1F02, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1F8B => Array(0x1F03, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1F8C => Array(0x1F04, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1F8D => Array(0x1F05, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1F8E => Array(0x1F06, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F8F => Array(0x1F07, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F90 => Array(0x1F20, 0x03B9) // GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI + case 0x1F91 => Array(0x1F21, 0x03B9) // GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI + case 0x1F92 => Array(0x1F22, 0x03B9) // GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI + case 0x1F93 => Array(0x1F23, 0x03B9) // GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI + case 0x1F94 => Array(0x1F24, 0x03B9) // GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI + case 0x1F95 => Array(0x1F25, 0x03B9) // GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI + case 0x1F96 => Array(0x1F26, 0x03B9) // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1F97 => Array(0x1F27, 0x03B9) // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1F98 => Array(0x1F20, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI + case 0x1F99 => Array(0x1F21, 0x03B9) // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI + case 0x1F9A => Array(0x1F22, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1F9B => Array(0x1F23, 0x03B9) // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1F9C => Array(0x1F24, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1F9D => Array(0x1F25, 0x03B9) // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1F9E => Array(0x1F26, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F9F => Array(0x1F27, 0x03B9) // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FA0 => Array(0x1F60, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI + case 0x1FA1 => Array(0x1F61, 0x03B9) // GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI + case 0x1FA2 => Array(0x1F62, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI + case 0x1FA3 => Array(0x1F63, 0x03B9) // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI + case 0x1FA4 => Array(0x1F64, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI + case 0x1FA5 => Array(0x1F65, 0x03B9) // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI + case 0x1FA6 => Array(0x1F66, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1FA7 => Array(0x1F67, 0x03B9) // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1FA8 => Array(0x1F60, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI + case 0x1FA9 => Array(0x1F61, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI + case 0x1FAA => Array(0x1F62, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1FAB => Array(0x1F63, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1FAC => Array(0x1F64, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1FAD => Array(0x1F65, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1FAE => Array(0x1F66, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FAF => Array(0x1F67, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FB2 => Array(0x1F70, 0x03B9) // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI + case 0x1FB3 => Array(0x03B1, 0x03B9) // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI + case 0x1FB4 => Array(0x03AC, 0x03B9) // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI + case 0x1FB6 => Array(0x03B1, 0x0342) // GREEK SMALL LETTER ALPHA WITH PERISPOMENI + case 0x1FB7 => Array(0x03B1, 0x0342, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI + case 0x1FBC => Array(0x03B1, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + case 0x1FC2 => Array(0x1F74, 0x03B9) // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI + case 0x1FC3 => Array(0x03B7, 0x03B9) // GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI + case 0x1FC4 => Array(0x03AE, 0x03B9) // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI + case 0x1FC6 => Array(0x03B7, 0x0342) // GREEK SMALL LETTER ETA WITH PERISPOMENI + case 0x1FC7 => Array(0x03B7, 0x0342, 0x03B9) // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI + case 0x1FCC => Array(0x03B7, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + case 0x1FD2 => Array(0x03B9, 0x0308, 0x0300) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA + case 0x1FD3 => Array(0x03B9, 0x0308, 0x0301) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA + case 0x1FD6 => Array(0x03B9, 0x0342) // GREEK SMALL LETTER IOTA WITH PERISPOMENI + case 0x1FD7 => Array(0x03B9, 0x0308, 0x0342) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI + case 0x1FE2 => Array(0x03C5, 0x0308, 0x0300) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA + case 0x1FE3 => Array(0x03C5, 0x0308, 0x0301) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA + case 0x1FE4 => Array(0x03C1, 0x0313) // GREEK SMALL LETTER RHO WITH PSILI + case 0x1FE6 => Array(0x03C5, 0x0342) // GREEK SMALL LETTER UPSILON WITH PERISPOMENI + case 0x1FE7 => Array(0x03C5, 0x0308, 0x0342) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI + case 0x1FF2 => Array(0x1F7C, 0x03B9) // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI + case 0x1FF3 => Array(0x03C9, 0x03B9) // GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI + case 0x1FF4 => Array(0x03CE, 0x03B9) // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI + case 0x1FF6 => Array(0x03C9, 0x0342) // GREEK SMALL LETTER OMEGA WITH PERISPOMENI + case 0x1FF7 => Array(0x03C9, 0x0342, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI + case 0x1FFC => Array(0x03C9, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + case 0xFB00 => Array(0x0066, 0x0066) // LATIN SMALL LIGATURE FF + case 0xFB01 => Array(0x0066, 0x0069) // LATIN SMALL LIGATURE FI + case 0xFB02 => Array(0x0066, 0x006C) // LATIN SMALL LIGATURE FL + case 0xFB03 => Array(0x0066, 0x0066, 0x0069) // LATIN SMALL LIGATURE FFI + case 0xFB04 => Array(0x0066, 0x0066, 0x006C) // LATIN SMALL LIGATURE FFL + case 0xFB05 => Array(0x0073, 0x0074) // LATIN SMALL LIGATURE LONG S T + case 0xFB06 => Array(0x0073, 0x0074) // LATIN SMALL LIGATURE ST + case 0xFB13 => Array(0x0574, 0x0576) // ARMENIAN SMALL LIGATURE MEN NOW + case 0xFB14 => Array(0x0574, 0x0565) // ARMENIAN SMALL LIGATURE MEN ECH + case 0xFB15 => Array(0x0574, 0x056B) // ARMENIAN SMALL LIGATURE MEN INI + case 0xFB16 => Array(0x057E, 0x0576) // ARMENIAN SMALL LIGATURE VEW NOW + case 0xFB17 => Array(0x0574, 0x056D) // ARMENIAN SMALL LIGATURE MEN XEH + case _ => Array(codePoint) // All others map to themselves + } +} diff --git a/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala b/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala index c7ea175..dfec161 100644 --- a/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala +++ b/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala @@ -18,8 +18,10 @@ package org.typelevel.ci package testing import java.util.Locale -import org.scalacheck.{Arbitrary, Cogen, Gen} +import org.scalacheck.{Arbitrary, Cogen, Gen, Shrink} import org.scalacheck.Arbitrary.arbitrary +import scala.annotation.tailrec +import scala.collection.immutable.BitSet object arbitraries { implicit val arbitraryForOrgTypelevelCiCIString: Arbitrary[CIString] = { @@ -34,9 +36,71 @@ object arbitraries { val lowers = chars.filter(_.isLower) val uppers = chars.filter(_.isUpper) val genChar = Gen.oneOf(weirdCharFolds, weirdStringFolds, lowers, uppers, arbitrary[Char]) - Arbitrary(Gen.listOf(genChar).map(cs => CIString(cs.mkString))) + + val surrogatePairStrings: Gen[String] = + // Any Unicode codepoint >= 0x10000 is represented on the JVM by a + // surrogate pair of two character values. + Gen.choose(0x10000, 0x10ffff).map(codePoint => + new String(Array(codePoint), 0, 1) + ) + + val titleCaseStrings: Gen[String] = { + @tailrec + def loop(acc: BitSet, codePoint: Int): BitSet = + if (codePoint > 0x10ffff) { + acc + } else { + if (Character.isTitleCase(codePoint)) { + loop(acc + codePoint, codePoint + 1) + } else { + loop(acc, codePoint + 1) + } + } + + Gen.oneOf(loop(BitSet.empty, 0)).map(codePoint => new String(Array(codePoint), 0, 1)) + } + + Arbitrary( + Gen.oneOf( + Gen.listOf(genChar).map(cs => CIString(cs.mkString)), + arbitrary[String].map(CIString.apply), + surrogatePairStrings.map(CIString.apply), + titleCaseStrings.map(CIString.apply) + ) + ) + } + + implicit val shrinkForCIString: Shrink[CIString] = { + val stringShrink: Shrink[String] = implicitly[Shrink[String]] + Shrink( + x => stringShrink.shrink(x.toString).map(CIString.apply) + ) } + implicit val cogenForOrgTypelevelCiCIString: Cogen[CIString] = Cogen[String].contramap(ci => new String(ci.toString.toArray.map(_.toLower))) + + implicit val arbCaseFoldedString: Arbitrary[CaseFoldedString] = + Arbitrary( + arbitrary[String].flatMap(value => + Gen.oneOf( + CaseFoldedString(value), + CaseFoldedString(value, true) // Turkic folding rules + ) + ) + ) + + implicit val cogenForCaseFoldedString: Cogen[CaseFoldedString] = + Cogen[String].contramap(_.toString) + + implicit val shrinkCaseFoldedString: Shrink[CaseFoldedString] = { + import scala.collection.immutable.Stream + val stringShrink: Shrink[String] = implicitly[Shrink[String]] + Shrink( + x => stringShrink.shrink(x.toString).flatMap(value => + Stream(CaseFoldedString(value), CaseFoldedString(value, true)) + ) + ) + } } diff --git a/tests/jvm/src/test/scala/org/typelevel/ci/CaseFoldedStringJVMSuite.scala b/tests/jvm/src/test/scala/org/typelevel/ci/CaseFoldedStringJVMSuite.scala new file mode 100644 index 0000000..6aa2018 --- /dev/null +++ b/tests/jvm/src/test/scala/org/typelevel/ci/CaseFoldedStringJVMSuite.scala @@ -0,0 +1,25 @@ +package org.typelevel.ci + +import cats.implicits._ +import java.io._ +import munit.ScalaCheckSuite +import org.typelevel.ci.testing.arbitraries._ +import org.scalacheck.Prop._ + +final class CaseFoldedStringJVMSuite extends ScalaCheckSuite { + property("serialization round trips") { + def roundTrip[A](x: A): A = { + val baos = new ByteArrayOutputStream + val oos = new ObjectOutputStream(baos) + oos.writeObject(x) + oos.close() + val bais = new ByteArrayInputStream(baos.toByteArray) + val ois = new ObjectInputStream(bais) + ois.readObject().asInstanceOf[A] + } + + forAll { (x: CaseFoldedString) => + x.eqv(roundTrip(x)) + } + } +} diff --git a/tests/shared/src/test/scala/org/typelevel/ci/CIStringSuite.scala b/tests/shared/src/test/scala/org/typelevel/ci/CIStringSuite.scala index a28087e..fd8cae2 100644 --- a/tests/shared/src/test/scala/org/typelevel/ci/CIStringSuite.scala +++ b/tests/shared/src/test/scala/org/typelevel/ci/CIStringSuite.scala @@ -22,20 +22,31 @@ import munit.DisciplineSuite import org.scalacheck.Prop._ import org.typelevel.ci.testing.arbitraries._ import scala.math.signum +import scala.annotation.tailrec class CIStringSuite extends DisciplineSuite { property("case insensitive equality") { forAll { (x: CIString) => - val y = CIString(new String(x.toString.toArray.map(_.toUpper))) - val z = CIString(new String(x.toString.toArray.map(_.toLower))) - assertEquals(y, z) + if (x.toString.contains('\u0131')) { + // '\u0131' is LATIN SMALL LETTER DOTLESS I The .toUpper on this + // character will yield a 'I', but the Unicode standard for case + // folding states \u0131 is only case insensitively equivalent to 'I' + // for Turkic languages and by default this mapping should not be + // used. + val y = CIString(x.toString.toLowerCase()) + val z = CIString(x.toString.toUpperCase()) + assertNotEquals(y, z) + } else { + val y = CIString(x.toString.toLowerCase()) + val z = CIString(x.toString.toUpperCase()) + val t = CIString(CIStringSuite.toTitleCase(x.toString)) + assertEquals(y, z) + assertEquals(y, t) + assertEquals(t, z) + } } } - test("character based equality") { - assert(CIString("ß") != CIString("SS")) - } - property("reflexive equality") { forAll { (x: CIString) => assertEquals(x, x) @@ -178,6 +189,17 @@ class CIStringSuite extends DisciplineSuite { }) } + // Test name copied from java.lang.Character.getName(), I know it's long... + test("GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI should compare equal with upper and loser case invocations"){ + val codePoint: Int = 8085 // Unicode codepoint of lower case value + val lower: String = (new String(Character.toChars(codePoint))).toLowerCase + val upper: String = lower.toUpperCase + val title: String = lower.map(c => Character.toTitleCase(c)).mkString + assertEquals(CIString(lower), CIString(upper)) + assertEquals(CIString(lower), CIString(title)) + assertEquals(CIString(title), CIString(upper)) + } + checkAll("Order[CIString]", OrderTests[CIString].order) checkAll("Hash[CIString]", HashTests[CIString].hash) checkAll("LowerBounded[CIString]", LowerBoundedTests[CIString].lowerBounded) @@ -187,3 +209,27 @@ class CIStringSuite extends DisciplineSuite { "CIString instances", SerializableTests.serializable(CIString.catsInstancesForOrgTypelevelCIString)) } + +object CIStringSuite { + def mapStringByCodepoint(f: Int => Int)(s: String): String = { + // Scala's wrapper class doesn't support appendCodePoint, so we need to + // explicitly use the java.lang.StringBuilder + val builder: java.lang.StringBuilder = new java.lang.StringBuilder(s.length) + + @tailrec + def loop(index: Int): String = + if (index >= s.length) { + builder.toString + } else { + val codePoint: Int = s.codePointAt(index) + builder.appendCodePoint(f(codePoint)) + val inc: Int = Character.charCount(codePoint) + loop(index + inc) + } + + loop(0) + } + + def toTitleCase(s: String): String = + mapStringByCodepoint(Character.toTitleCase)(s) +} diff --git a/tests/shared/src/test/scala/org/typelevel/ci/CaseFoldedStringSuite.scala b/tests/shared/src/test/scala/org/typelevel/ci/CaseFoldedStringSuite.scala new file mode 100644 index 0000000..7d70aad --- /dev/null +++ b/tests/shared/src/test/scala/org/typelevel/ci/CaseFoldedStringSuite.scala @@ -0,0 +1,190 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import cats.implicits._ +import cats.kernel.laws.discipline._ +import munit.DisciplineSuite +import org.scalacheck.Prop._ +import org.typelevel.ci.testing.arbitraries._ +import scala.math.signum +import scala.annotation.tailrec + +final class CaseFoldedStringSuite extends DisciplineSuite { + property("case insensitive equality") { + forAll { (x: CaseFoldedString) => + if (x.toString.contains('\u0131')) { + // '\u0131' is LATIN SMALL LETTER DOTLESS I The .toUpper on this + // character will yield a 'I', but the Unicode standard for case + // folding states \u0131 is only case insensitively equivalent to 'I' + // for Turkic languages and by default this mapping should not be + // used. + val y = CaseFoldedString(x.toString.toLowerCase()) + val z = CaseFoldedString(x.toString.toUpperCase()) + assertNotEquals(y, z) + } else { + val y = CaseFoldedString(x.toString.toLowerCase()) + val z = CaseFoldedString(x.toString.toUpperCase()) + val t = CaseFoldedString(CaseFoldedStringSuite.toTitleCase(x.toString)) + assertEquals(y, z) + assertEquals(y, t) + assertEquals(t, z) + } + } + } + + test("case insensitive comparison") { + assert(CaseFoldedString("case-insensitive") < CaseFoldedString("CI")) + } + + property("reflexive comparison") { + forAll { (x: CaseFoldedString) => + assertEquals(x.compare(x), 0) + } + } + + property("equality consistent with comparison") { + forAll { (x: CaseFoldedString, y: CaseFoldedString) => + assertEquals((x == y), (x.compare(y) == 0)) + } + } + + property("hashCode consistent with equality") { + forAll { (x: CaseFoldedString, y: CaseFoldedString) => + assert((x != y) || (x.hashCode == y.hashCode)) + } + } + + test("isEmpty is true given an empty string") { + assert(CaseFoldedString("").isEmpty) + } + + test("isEmpty is false given a non-empty string") { + assert(!CaseFoldedString("non-empty string").isEmpty) + } + + property("is never equal to .nonEmpty for any given string") { + forAll { (ci: CaseFoldedString) => + assert(ci.isEmpty != ci.nonEmpty) + } + } + + test("nonEmpty is true given a non-empty string") { + assert(CaseFoldedString("non-empty string").nonEmpty) + } + + test("nonEmpty is false given an empty string") { + assert(!CaseFoldedString("").nonEmpty) + } + + test("trim removes leading whitespace") { + assert(CaseFoldedString(" text").trim == CaseFoldedString("text")) + } + + test("removes trailing whitespace") { + assert(CaseFoldedString("text ").trim == CaseFoldedString("text")) + } + + test("removes leading and trailing whitespace") { + assert(CaseFoldedString(" text ").trim == CaseFoldedString("text")) + } + + // property("ci interpolator is consistent with apply") { + // forAll { (s: String) => + // assertEquals(ci"$s", CaseFoldedString(s)) + // } + // } + + // property("ci interpolator handles expressions") { + // forAll { (x: Int, y: Int) => + // assertEquals(ci"${x + y}", CaseFoldedString((x + y).toString)) + // } + // } + + // property("ci interpolator handles multiple parts") { + // forAll { (a: String, b: String, c: String) => + // assertEquals(ci"$a:$b:$c", CaseFoldedString(s"$a:$b:$c")) + // } + // } + + // property("ci interpolator extractor is case-insensitive") { + // forAll { (s: String) => + // assert(CaseFoldedString(new String(s.toString.toArray.map(_.toUpper))) match { + // case ci"${t}" => t == CaseFoldedString(s) + // case _ => false + // }) + + // assert(CaseFoldedString(new String(s.toString.toArray.map(_.toLower))) match { + // case ci"${t}" => t == CaseFoldedString(s) + // case _ => false + // }) + // } + // } + + // test("ci interpolator extracts multiple parts") { + // assert(CaseFoldedString("Hello, Aretha") match { + // case ci"${greeting}, ${name}" => greeting == ci"Hello" && name == ci"Aretha" + // }) + // } + + // test("ci interpolator matches literals") { + // assert(CaseFoldedString("literally") match { + // case ci"LiTeRaLlY" => true + // case _ => false + // }) + // } + + // Test name copied from java.lang.Character.getName(), I know it's long... + test("GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI should compare equal with upper and loser case invocations"){ + val codePoint: Int = 8085 // Unicode codepoint of lower case value + val lower: String = (new String(Character.toChars(codePoint))).toLowerCase + val upper: String = lower.toUpperCase + val title: String = lower.map(c => Character.toTitleCase(c)).mkString + assertEquals(CaseFoldedString(lower), CaseFoldedString(upper)) + assertEquals(CaseFoldedString(lower), CaseFoldedString(title)) + assertEquals(CaseFoldedString(title), CaseFoldedString(upper)) + } + + checkAll("Order[CaseFoldedString]", OrderTests[CaseFoldedString].order) + checkAll("Hash[CaseFoldedString]", HashTests[CaseFoldedString].hash) + checkAll("LowerBounded[CaseFoldedString]", LowerBoundedTests[CaseFoldedString].lowerBounded) + checkAll("Monoid[CaseFoldedString]", MonoidTests[CaseFoldedString].monoid) +} + +object CaseFoldedStringSuite { + def mapStringByCodepoint(f: Int => Int)(s: String): String = { + // Scala's wrapper class doesn't support appendCodePoint, so we need to + // explicitly use the java.lang.StringBuilder + val builder: java.lang.StringBuilder = new java.lang.StringBuilder(s.length) + + @tailrec + def loop(index: Int): String = + if (index >= s.length) { + builder.toString + } else { + val codePoint: Int = s.codePointAt(index) + builder.appendCodePoint(f(codePoint)) + val inc: Int = Character.charCount(codePoint) + loop(index + inc) + } + + loop(0) + } + + def toTitleCase(s: String): String = + mapStringByCodepoint(Character.toTitleCase)(s) +} From f0bd02f183f45f14d4656613d72dabdfc26453a2 Mon Sep 17 00:00:00 2001 From: David Strawn Date: Sat, 5 Feb 2022 17:18:48 -0700 Subject: [PATCH 02/10] Fix Formatting Error --- core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala index 49c9d75..06348fc 100644 --- a/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala +++ b/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala @@ -27,7 +27,8 @@ import scala.annotation.tailrec * scala> val codePoint: Int = 8093 * val codePoint: Int = 8093 * - * scala> new String(Character.toChars(codePoint)) * val res0: String = ᾝ + * scala> new String(Character.toChars(codePoint)) + * val res0: String = ᾝ * * scala> res0.toUpperCase * val res1: String = ἭΙ From cfebcb04856fe2a5199a2335be198e363b5c7620 Mon Sep 17 00:00:00 2001 From: David Strawn Date: Sat, 5 Feb 2022 17:23:03 -0700 Subject: [PATCH 03/10] Fix Compare --- core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala index 06348fc..f5769ec 100644 --- a/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala +++ b/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala @@ -128,7 +128,7 @@ object CaseFoldedString { x.hashCode override def compare(x: CaseFoldedString, y: CaseFoldedString): Int = - x.compare(y) + x.toString.compare(y.toString) } implicit val orderingForCaseFoldedString: Ordering[CaseFoldedString] = From 9ca6c36648b094aa9bf0f61b1201d4514da57db6 Mon Sep 17 00:00:00 2001 From: David Strawn Date: Sat, 5 Feb 2022 17:24:13 -0700 Subject: [PATCH 04/10] Fix CIString Compare --- core/src/main/scala/org/typelevel/ci/CIString.scala | 2 +- .../src/main/scala/org/typelevel/ci/testing/arbitraries.scala | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/typelevel/ci/CIString.scala b/core/src/main/scala/org/typelevel/ci/CIString.scala index 6b9e339..af4ff7a 100644 --- a/core/src/main/scala/org/typelevel/ci/CIString.scala +++ b/core/src/main/scala/org/typelevel/ci/CIString.scala @@ -63,7 +63,7 @@ final class CIString private (override val toString: String, val asCaseFoldedStr asCaseFoldedString.hashCode override def compare(that: CIString): Int = - asCaseFoldedString.compare(that.asCaseFoldedString) + Ordering[CaseFoldedString].compare(asCaseFoldedString, that.asCaseFoldedString) def transform(f: String => String): CIString = CIString(f(toString)) diff --git a/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala b/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala index dfec161..288e9aa 100644 --- a/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala +++ b/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala @@ -18,8 +18,9 @@ package org.typelevel.ci package testing import java.util.Locale -import org.scalacheck.{Arbitrary, Cogen, Gen, Shrink} import org.scalacheck.Arbitrary.arbitrary +import org.scalacheck.{Arbitrary, Cogen, Gen, Shrink} +import scala.annotation.nowarn import scala.annotation.tailrec import scala.collection.immutable.BitSet @@ -94,6 +95,7 @@ object arbitraries { implicit val cogenForCaseFoldedString: Cogen[CaseFoldedString] = Cogen[String].contramap(_.toString) + @nowarn("cat=deprecation") implicit val shrinkCaseFoldedString: Shrink[CaseFoldedString] = { import scala.collection.immutable.Stream val stringShrink: Shrink[String] = implicitly[Shrink[String]] From 98f82e77fee154a3e45dca7a1eec1e4ddca1ca83 Mon Sep 17 00:00:00 2001 From: David Strawn Date: Sun, 6 Feb 2022 07:23:43 -0700 Subject: [PATCH 05/10] Rename CaseFolds To CaseFolding --- core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala | 4 ++-- .../org/typelevel/ci/{CaseFolds.scala => CaseFolding.scala} | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename core/src/main/scala/org/typelevel/ci/{CaseFolds.scala => CaseFolding.scala} (99%) diff --git a/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala index f5769ec..00a980f 100644 --- a/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala +++ b/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala @@ -88,9 +88,9 @@ object CaseFoldedString { val builder: java.lang.StringBuilder = new java.lang.StringBuilder(value.length * 3) val foldCodePoint: Int => Array[Int] = if (turkicFoldingRules) { - CaseFolds.turkicFullCaseFoldedCodePoints + CaseFolding.turkicFullCaseFoldedCodePoints } else { - CaseFolds.fullCaseFoldedCodePoints + CaseFolding.fullCaseFoldedCodePoints } @tailrec diff --git a/core/src/main/scala/org/typelevel/ci/CaseFolds.scala b/core/src/main/scala/org/typelevel/ci/CaseFolding.scala similarity index 99% rename from core/src/main/scala/org/typelevel/ci/CaseFolds.scala rename to core/src/main/scala/org/typelevel/ci/CaseFolding.scala index 66ce86d..d04bfb9 100644 --- a/core/src/main/scala/org/typelevel/ci/CaseFolds.scala +++ b/core/src/main/scala/org/typelevel/ci/CaseFolding.scala @@ -3,7 +3,7 @@ package org.typelevel.ci import java.util.Locale import scala.annotation.tailrec -private[ci] object CaseFolds { +private[ci] object CaseFolding { def turkicFullCaseFoldedCodePoints(codePoint: Int): Array[Int] = codePoint match { From 6ed84338e328e011b589c22d2626c96748fc8033 Mon Sep 17 00:00:00 2001 From: David Strawn Date: Sun, 6 Feb 2022 07:52:15 -0700 Subject: [PATCH 06/10] Add Simple Case Folding Tables --- .../scala/org/typelevel/ci/CaseFolding.scala | 2955 +++++++++-------- 1 file changed, 1525 insertions(+), 1430 deletions(-) diff --git a/core/src/main/scala/org/typelevel/ci/CaseFolding.scala b/core/src/main/scala/org/typelevel/ci/CaseFolding.scala index d04bfb9..00a71de 100644 --- a/core/src/main/scala/org/typelevel/ci/CaseFolding.scala +++ b/core/src/main/scala/org/typelevel/ci/CaseFolding.scala @@ -1,10 +1,35 @@ package org.typelevel.ci -import java.util.Locale -import scala.annotation.tailrec - +/** These are lookup tables for case folding. There are several different case + * folding algorithms which can be employed with different trade offs. + * + * @note Some case folding, in particular full case folding, can yield more + * codePoints than the original value. That is, it can ''increase'' the + * size of `String` values once folded. + * + * @see [[https://www.unicode.org/versions/Unicode14.0.0/ch05.pdf#G21790 Caseless Matching]] + * @see [[https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt Unicode Case Folding Tables]] + */ private[ci] object CaseFolding { + // Note to library maintainers: These functions are intentionally written + // with int based case matching so that they will compile to a fast + // lookupswitch. Please keep this in mind when making changes. + // + // From `javap -v CaseFolding\$.class` on Scala 2.13. + // + // {{{ + // 3: lookupswitch { // 1530 + // 65: 12252 + // 66: 12263 + // }}} + + /** This function transforms a Unicode codePoint into it's full case folded + * variant, with the rule changes which are applicable to ''some'' Turkic + * languages. + * + * For other languages these rules should not be applied. + */ def turkicFullCaseFoldedCodePoints(codePoint: Int): Array[Int] = codePoint match { case 0x0049 => Array(0x0131) // LATIN CAPITAL LETTER I @@ -13,1434 +38,27 @@ private[ci] object CaseFolding { fullCaseFoldedCodePoints(codePoint) } + /** This function transforms a Unicode codePoint into it's simple case folded + * variant, with the rule changes which are applicable to ''some'' Turkic + * languages. + * + * For other languages these rules should not be applied. + */ + def turkicSimpleCaseFoldedCodePoints(codePoint: Int): Int = + codePoint match { + case 0x0049 => 0x0131 // LATIN CAPITAL LETTER I + case 0x0130 => 0x0069 // LATIN CAPITAL LETTER I WITH DOT ABOVE + case _ => + simpleCaseFoldedCodePoints(codePoint) + } + + /** This function transforms a Unicode codePoint into it's full case folded + * variant using the default rules. + * + * It is equivalent to the "C + F" rules from `CaseFolding.txt`. + */ def fullCaseFoldedCodePoints(codePoint: Int): Array[Int] = codePoint match { - case 0x0041 => Array(0x0061) // LATIN CAPITAL LETTER A - case 0x0042 => Array(0x0062) // LATIN CAPITAL LETTER B - case 0x0043 => Array(0x0063) // LATIN CAPITAL LETTER C - case 0x0044 => Array(0x0064) // LATIN CAPITAL LETTER D - case 0x0045 => Array(0x0065) // LATIN CAPITAL LETTER E - case 0x0046 => Array(0x0066) // LATIN CAPITAL LETTER F - case 0x0047 => Array(0x0067) // LATIN CAPITAL LETTER G - case 0x0048 => Array(0x0068) // LATIN CAPITAL LETTER H - case 0x0049 => Array(0x0069) // LATIN CAPITAL LETTER I - case 0x004A => Array(0x006A) // LATIN CAPITAL LETTER J - case 0x004B => Array(0x006B) // LATIN CAPITAL LETTER K - case 0x004C => Array(0x006C) // LATIN CAPITAL LETTER L - case 0x004D => Array(0x006D) // LATIN CAPITAL LETTER M - case 0x004E => Array(0x006E) // LATIN CAPITAL LETTER N - case 0x004F => Array(0x006F) // LATIN CAPITAL LETTER O - case 0x0050 => Array(0x0070) // LATIN CAPITAL LETTER P - case 0x0051 => Array(0x0071) // LATIN CAPITAL LETTER Q - case 0x0052 => Array(0x0072) // LATIN CAPITAL LETTER R - case 0x0053 => Array(0x0073) // LATIN CAPITAL LETTER S - case 0x0054 => Array(0x0074) // LATIN CAPITAL LETTER T - case 0x0055 => Array(0x0075) // LATIN CAPITAL LETTER U - case 0x0056 => Array(0x0076) // LATIN CAPITAL LETTER V - case 0x0057 => Array(0x0077) // LATIN CAPITAL LETTER W - case 0x0058 => Array(0x0078) // LATIN CAPITAL LETTER X - case 0x0059 => Array(0x0079) // LATIN CAPITAL LETTER Y - case 0x005A => Array(0x007A) // LATIN CAPITAL LETTER Z - case 0x00B5 => Array(0x03BC) // MICRO SIGN - case 0x00C0 => Array(0x00E0) // LATIN CAPITAL LETTER A WITH GRAVE - case 0x00C1 => Array(0x00E1) // LATIN CAPITAL LETTER A WITH ACUTE - case 0x00C2 => Array(0x00E2) // LATIN CAPITAL LETTER A WITH CIRCUMFLEX - case 0x00C3 => Array(0x00E3) // LATIN CAPITAL LETTER A WITH TILDE - case 0x00C4 => Array(0x00E4) // LATIN CAPITAL LETTER A WITH DIAERESIS - case 0x00C5 => Array(0x00E5) // LATIN CAPITAL LETTER A WITH RING ABOVE - case 0x00C6 => Array(0x00E6) // LATIN CAPITAL LETTER AE - case 0x00C7 => Array(0x00E7) // LATIN CAPITAL LETTER C WITH CEDILLA - case 0x00C8 => Array(0x00E8) // LATIN CAPITAL LETTER E WITH GRAVE - case 0x00C9 => Array(0x00E9) // LATIN CAPITAL LETTER E WITH ACUTE - case 0x00CA => Array(0x00EA) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX - case 0x00CB => Array(0x00EB) // LATIN CAPITAL LETTER E WITH DIAERESIS - case 0x00CC => Array(0x00EC) // LATIN CAPITAL LETTER I WITH GRAVE - case 0x00CD => Array(0x00ED) // LATIN CAPITAL LETTER I WITH ACUTE - case 0x00CE => Array(0x00EE) // LATIN CAPITAL LETTER I WITH CIRCUMFLEX - case 0x00CF => Array(0x00EF) // LATIN CAPITAL LETTER I WITH DIAERESIS - case 0x00D0 => Array(0x00F0) // LATIN CAPITAL LETTER ETH - case 0x00D1 => Array(0x00F1) // LATIN CAPITAL LETTER N WITH TILDE - case 0x00D2 => Array(0x00F2) // LATIN CAPITAL LETTER O WITH GRAVE - case 0x00D3 => Array(0x00F3) // LATIN CAPITAL LETTER O WITH ACUTE - case 0x00D4 => Array(0x00F4) // LATIN CAPITAL LETTER O WITH CIRCUMFLEX - case 0x00D5 => Array(0x00F5) // LATIN CAPITAL LETTER O WITH TILDE - case 0x00D6 => Array(0x00F6) // LATIN CAPITAL LETTER O WITH DIAERESIS - case 0x00D8 => Array(0x00F8) // LATIN CAPITAL LETTER O WITH STROKE - case 0x00D9 => Array(0x00F9) // LATIN CAPITAL LETTER U WITH GRAVE - case 0x00DA => Array(0x00FA) // LATIN CAPITAL LETTER U WITH ACUTE - case 0x00DB => Array(0x00FB) // LATIN CAPITAL LETTER U WITH CIRCUMFLEX - case 0x00DC => Array(0x00FC) // LATIN CAPITAL LETTER U WITH DIAERESIS - case 0x00DD => Array(0x00FD) // LATIN CAPITAL LETTER Y WITH ACUTE - case 0x00DE => Array(0x00FE) // LATIN CAPITAL LETTER THORN - case 0x0100 => Array(0x0101) // LATIN CAPITAL LETTER A WITH MACRON - case 0x0102 => Array(0x0103) // LATIN CAPITAL LETTER A WITH BREVE - case 0x0104 => Array(0x0105) // LATIN CAPITAL LETTER A WITH OGONEK - case 0x0106 => Array(0x0107) // LATIN CAPITAL LETTER C WITH ACUTE - case 0x0108 => Array(0x0109) // LATIN CAPITAL LETTER C WITH CIRCUMFLEX - case 0x010A => Array(0x010B) // LATIN CAPITAL LETTER C WITH DOT ABOVE - case 0x010C => Array(0x010D) // LATIN CAPITAL LETTER C WITH CARON - case 0x010E => Array(0x010F) // LATIN CAPITAL LETTER D WITH CARON - case 0x0110 => Array(0x0111) // LATIN CAPITAL LETTER D WITH STROKE - case 0x0112 => Array(0x0113) // LATIN CAPITAL LETTER E WITH MACRON - case 0x0114 => Array(0x0115) // LATIN CAPITAL LETTER E WITH BREVE - case 0x0116 => Array(0x0117) // LATIN CAPITAL LETTER E WITH DOT ABOVE - case 0x0118 => Array(0x0119) // LATIN CAPITAL LETTER E WITH OGONEK - case 0x011A => Array(0x011B) // LATIN CAPITAL LETTER E WITH CARON - case 0x011C => Array(0x011D) // LATIN CAPITAL LETTER G WITH CIRCUMFLEX - case 0x011E => Array(0x011F) // LATIN CAPITAL LETTER G WITH BREVE - case 0x0120 => Array(0x0121) // LATIN CAPITAL LETTER G WITH DOT ABOVE - case 0x0122 => Array(0x0123) // LATIN CAPITAL LETTER G WITH CEDILLA - case 0x0124 => Array(0x0125) // LATIN CAPITAL LETTER H WITH CIRCUMFLEX - case 0x0126 => Array(0x0127) // LATIN CAPITAL LETTER H WITH STROKE - case 0x0128 => Array(0x0129) // LATIN CAPITAL LETTER I WITH TILDE - case 0x012A => Array(0x012B) // LATIN CAPITAL LETTER I WITH MACRON - case 0x012C => Array(0x012D) // LATIN CAPITAL LETTER I WITH BREVE - case 0x012E => Array(0x012F) // LATIN CAPITAL LETTER I WITH OGONEK - case 0x0132 => Array(0x0133) // LATIN CAPITAL LIGATURE IJ - case 0x0134 => Array(0x0135) // LATIN CAPITAL LETTER J WITH CIRCUMFLEX - case 0x0136 => Array(0x0137) // LATIN CAPITAL LETTER K WITH CEDILLA - case 0x0139 => Array(0x013A) // LATIN CAPITAL LETTER L WITH ACUTE - case 0x013B => Array(0x013C) // LATIN CAPITAL LETTER L WITH CEDILLA - case 0x013D => Array(0x013E) // LATIN CAPITAL LETTER L WITH CARON - case 0x013F => Array(0x0140) // LATIN CAPITAL LETTER L WITH MIDDLE DOT - case 0x0141 => Array(0x0142) // LATIN CAPITAL LETTER L WITH STROKE - case 0x0143 => Array(0x0144) // LATIN CAPITAL LETTER N WITH ACUTE - case 0x0145 => Array(0x0146) // LATIN CAPITAL LETTER N WITH CEDILLA - case 0x0147 => Array(0x0148) // LATIN CAPITAL LETTER N WITH CARON - case 0x014A => Array(0x014B) // LATIN CAPITAL LETTER ENG - case 0x014C => Array(0x014D) // LATIN CAPITAL LETTER O WITH MACRON - case 0x014E => Array(0x014F) // LATIN CAPITAL LETTER O WITH BREVE - case 0x0150 => Array(0x0151) // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - case 0x0152 => Array(0x0153) // LATIN CAPITAL LIGATURE OE - case 0x0154 => Array(0x0155) // LATIN CAPITAL LETTER R WITH ACUTE - case 0x0156 => Array(0x0157) // LATIN CAPITAL LETTER R WITH CEDILLA - case 0x0158 => Array(0x0159) // LATIN CAPITAL LETTER R WITH CARON - case 0x015A => Array(0x015B) // LATIN CAPITAL LETTER S WITH ACUTE - case 0x015C => Array(0x015D) // LATIN CAPITAL LETTER S WITH CIRCUMFLEX - case 0x015E => Array(0x015F) // LATIN CAPITAL LETTER S WITH CEDILLA - case 0x0160 => Array(0x0161) // LATIN CAPITAL LETTER S WITH CARON - case 0x0162 => Array(0x0163) // LATIN CAPITAL LETTER T WITH CEDILLA - case 0x0164 => Array(0x0165) // LATIN CAPITAL LETTER T WITH CARON - case 0x0166 => Array(0x0167) // LATIN CAPITAL LETTER T WITH STROKE - case 0x0168 => Array(0x0169) // LATIN CAPITAL LETTER U WITH TILDE - case 0x016A => Array(0x016B) // LATIN CAPITAL LETTER U WITH MACRON - case 0x016C => Array(0x016D) // LATIN CAPITAL LETTER U WITH BREVE - case 0x016E => Array(0x016F) // LATIN CAPITAL LETTER U WITH RING ABOVE - case 0x0170 => Array(0x0171) // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - case 0x0172 => Array(0x0173) // LATIN CAPITAL LETTER U WITH OGONEK - case 0x0174 => Array(0x0175) // LATIN CAPITAL LETTER W WITH CIRCUMFLEX - case 0x0176 => Array(0x0177) // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX - case 0x0178 => Array(0x00FF) // LATIN CAPITAL LETTER Y WITH DIAERESIS - case 0x0179 => Array(0x017A) // LATIN CAPITAL LETTER Z WITH ACUTE - case 0x017B => Array(0x017C) // LATIN CAPITAL LETTER Z WITH DOT ABOVE - case 0x017D => Array(0x017E) // LATIN CAPITAL LETTER Z WITH CARON - case 0x017F => Array(0x0073) // LATIN SMALL LETTER LONG S - case 0x0181 => Array(0x0253) // LATIN CAPITAL LETTER B WITH HOOK - case 0x0182 => Array(0x0183) // LATIN CAPITAL LETTER B WITH TOPBAR - case 0x0184 => Array(0x0185) // LATIN CAPITAL LETTER TONE SIX - case 0x0186 => Array(0x0254) // LATIN CAPITAL LETTER OPEN O - case 0x0187 => Array(0x0188) // LATIN CAPITAL LETTER C WITH HOOK - case 0x0189 => Array(0x0256) // LATIN CAPITAL LETTER AFRICAN D - case 0x018A => Array(0x0257) // LATIN CAPITAL LETTER D WITH HOOK - case 0x018B => Array(0x018C) // LATIN CAPITAL LETTER D WITH TOPBAR - case 0x018E => Array(0x01DD) // LATIN CAPITAL LETTER REVERSED E - case 0x018F => Array(0x0259) // LATIN CAPITAL LETTER SCHWA - case 0x0190 => Array(0x025B) // LATIN CAPITAL LETTER OPEN E - case 0x0191 => Array(0x0192) // LATIN CAPITAL LETTER F WITH HOOK - case 0x0193 => Array(0x0260) // LATIN CAPITAL LETTER G WITH HOOK - case 0x0194 => Array(0x0263) // LATIN CAPITAL LETTER GAMMA - case 0x0196 => Array(0x0269) // LATIN CAPITAL LETTER IOTA - case 0x0197 => Array(0x0268) // LATIN CAPITAL LETTER I WITH STROKE - case 0x0198 => Array(0x0199) // LATIN CAPITAL LETTER K WITH HOOK - case 0x019C => Array(0x026F) // LATIN CAPITAL LETTER TURNED M - case 0x019D => Array(0x0272) // LATIN CAPITAL LETTER N WITH LEFT HOOK - case 0x019F => Array(0x0275) // LATIN CAPITAL LETTER O WITH MIDDLE TILDE - case 0x01A0 => Array(0x01A1) // LATIN CAPITAL LETTER O WITH HORN - case 0x01A2 => Array(0x01A3) // LATIN CAPITAL LETTER OI - case 0x01A4 => Array(0x01A5) // LATIN CAPITAL LETTER P WITH HOOK - case 0x01A6 => Array(0x0280) // LATIN LETTER YR - case 0x01A7 => Array(0x01A8) // LATIN CAPITAL LETTER TONE TWO - case 0x01A9 => Array(0x0283) // LATIN CAPITAL LETTER ESH - case 0x01AC => Array(0x01AD) // LATIN CAPITAL LETTER T WITH HOOK - case 0x01AE => Array(0x0288) // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK - case 0x01AF => Array(0x01B0) // LATIN CAPITAL LETTER U WITH HORN - case 0x01B1 => Array(0x028A) // LATIN CAPITAL LETTER UPSILON - case 0x01B2 => Array(0x028B) // LATIN CAPITAL LETTER V WITH HOOK - case 0x01B3 => Array(0x01B4) // LATIN CAPITAL LETTER Y WITH HOOK - case 0x01B5 => Array(0x01B6) // LATIN CAPITAL LETTER Z WITH STROKE - case 0x01B7 => Array(0x0292) // LATIN CAPITAL LETTER EZH - case 0x01B8 => Array(0x01B9) // LATIN CAPITAL LETTER EZH REVERSED - case 0x01BC => Array(0x01BD) // LATIN CAPITAL LETTER TONE FIVE - case 0x01C4 => Array(0x01C6) // LATIN CAPITAL LETTER DZ WITH CARON - case 0x01C5 => Array(0x01C6) // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON - case 0x01C7 => Array(0x01C9) // LATIN CAPITAL LETTER LJ - case 0x01C8 => Array(0x01C9) // LATIN CAPITAL LETTER L WITH SMALL LETTER J - case 0x01CA => Array(0x01CC) // LATIN CAPITAL LETTER NJ - case 0x01CB => Array(0x01CC) // LATIN CAPITAL LETTER N WITH SMALL LETTER J - case 0x01CD => Array(0x01CE) // LATIN CAPITAL LETTER A WITH CARON - case 0x01CF => Array(0x01D0) // LATIN CAPITAL LETTER I WITH CARON - case 0x01D1 => Array(0x01D2) // LATIN CAPITAL LETTER O WITH CARON - case 0x01D3 => Array(0x01D4) // LATIN CAPITAL LETTER U WITH CARON - case 0x01D5 => Array(0x01D6) // LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON - case 0x01D7 => Array(0x01D8) // LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE - case 0x01D9 => Array(0x01DA) // LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON - case 0x01DB => Array(0x01DC) // LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE - case 0x01DE => Array(0x01DF) // LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON - case 0x01E0 => Array(0x01E1) // LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON - case 0x01E2 => Array(0x01E3) // LATIN CAPITAL LETTER AE WITH MACRON - case 0x01E4 => Array(0x01E5) // LATIN CAPITAL LETTER G WITH STROKE - case 0x01E6 => Array(0x01E7) // LATIN CAPITAL LETTER G WITH CARON - case 0x01E8 => Array(0x01E9) // LATIN CAPITAL LETTER K WITH CARON - case 0x01EA => Array(0x01EB) // LATIN CAPITAL LETTER O WITH OGONEK - case 0x01EC => Array(0x01ED) // LATIN CAPITAL LETTER O WITH OGONEK AND MACRON - case 0x01EE => Array(0x01EF) // LATIN CAPITAL LETTER EZH WITH CARON - case 0x01F1 => Array(0x01F3) // LATIN CAPITAL LETTER DZ - case 0x01F2 => Array(0x01F3) // LATIN CAPITAL LETTER D WITH SMALL LETTER Z - case 0x01F4 => Array(0x01F5) // LATIN CAPITAL LETTER G WITH ACUTE - case 0x01F6 => Array(0x0195) // LATIN CAPITAL LETTER HWAIR - case 0x01F7 => Array(0x01BF) // LATIN CAPITAL LETTER WYNN - case 0x01F8 => Array(0x01F9) // LATIN CAPITAL LETTER N WITH GRAVE - case 0x01FA => Array(0x01FB) // LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE - case 0x01FC => Array(0x01FD) // LATIN CAPITAL LETTER AE WITH ACUTE - case 0x01FE => Array(0x01FF) // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE - case 0x0200 => Array(0x0201) // LATIN CAPITAL LETTER A WITH DOUBLE GRAVE - case 0x0202 => Array(0x0203) // LATIN CAPITAL LETTER A WITH INVERTED BREVE - case 0x0204 => Array(0x0205) // LATIN CAPITAL LETTER E WITH DOUBLE GRAVE - case 0x0206 => Array(0x0207) // LATIN CAPITAL LETTER E WITH INVERTED BREVE - case 0x0208 => Array(0x0209) // LATIN CAPITAL LETTER I WITH DOUBLE GRAVE - case 0x020A => Array(0x020B) // LATIN CAPITAL LETTER I WITH INVERTED BREVE - case 0x020C => Array(0x020D) // LATIN CAPITAL LETTER O WITH DOUBLE GRAVE - case 0x020E => Array(0x020F) // LATIN CAPITAL LETTER O WITH INVERTED BREVE - case 0x0210 => Array(0x0211) // LATIN CAPITAL LETTER R WITH DOUBLE GRAVE - case 0x0212 => Array(0x0213) // LATIN CAPITAL LETTER R WITH INVERTED BREVE - case 0x0214 => Array(0x0215) // LATIN CAPITAL LETTER U WITH DOUBLE GRAVE - case 0x0216 => Array(0x0217) // LATIN CAPITAL LETTER U WITH INVERTED BREVE - case 0x0218 => Array(0x0219) // LATIN CAPITAL LETTER S WITH COMMA BELOW - case 0x021A => Array(0x021B) // LATIN CAPITAL LETTER T WITH COMMA BELOW - case 0x021C => Array(0x021D) // LATIN CAPITAL LETTER YOGH - case 0x021E => Array(0x021F) // LATIN CAPITAL LETTER H WITH CARON - case 0x0220 => Array(0x019E) // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG - case 0x0222 => Array(0x0223) // LATIN CAPITAL LETTER OU - case 0x0224 => Array(0x0225) // LATIN CAPITAL LETTER Z WITH HOOK - case 0x0226 => Array(0x0227) // LATIN CAPITAL LETTER A WITH DOT ABOVE - case 0x0228 => Array(0x0229) // LATIN CAPITAL LETTER E WITH CEDILLA - case 0x022A => Array(0x022B) // LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON - case 0x022C => Array(0x022D) // LATIN CAPITAL LETTER O WITH TILDE AND MACRON - case 0x022E => Array(0x022F) // LATIN CAPITAL LETTER O WITH DOT ABOVE - case 0x0230 => Array(0x0231) // LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON - case 0x0232 => Array(0x0233) // LATIN CAPITAL LETTER Y WITH MACRON - case 0x023A => Array(0x2C65) // LATIN CAPITAL LETTER A WITH STROKE - case 0x023B => Array(0x023C) // LATIN CAPITAL LETTER C WITH STROKE - case 0x023D => Array(0x019A) // LATIN CAPITAL LETTER L WITH BAR - case 0x023E => Array(0x2C66) // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE - case 0x0241 => Array(0x0242) // LATIN CAPITAL LETTER GLOTTAL STOP - case 0x0243 => Array(0x0180) // LATIN CAPITAL LETTER B WITH STROKE - case 0x0244 => Array(0x0289) // LATIN CAPITAL LETTER U BAR - case 0x0245 => Array(0x028C) // LATIN CAPITAL LETTER TURNED V - case 0x0246 => Array(0x0247) // LATIN CAPITAL LETTER E WITH STROKE - case 0x0248 => Array(0x0249) // LATIN CAPITAL LETTER J WITH STROKE - case 0x024A => Array(0x024B) // LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL - case 0x024C => Array(0x024D) // LATIN CAPITAL LETTER R WITH STROKE - case 0x024E => Array(0x024F) // LATIN CAPITAL LETTER Y WITH STROKE - case 0x0345 => Array(0x03B9) // COMBINING GREEK YPOGEGRAMMENI - case 0x0370 => Array(0x0371) // GREEK CAPITAL LETTER HETA - case 0x0372 => Array(0x0373) // GREEK CAPITAL LETTER ARCHAIC SAMPI - case 0x0376 => Array(0x0377) // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA - case 0x037F => Array(0x03F3) // GREEK CAPITAL LETTER YOT - case 0x0386 => Array(0x03AC) // GREEK CAPITAL LETTER ALPHA WITH TONOS - case 0x0388 => Array(0x03AD) // GREEK CAPITAL LETTER EPSILON WITH TONOS - case 0x0389 => Array(0x03AE) // GREEK CAPITAL LETTER ETA WITH TONOS - case 0x038A => Array(0x03AF) // GREEK CAPITAL LETTER IOTA WITH TONOS - case 0x038C => Array(0x03CC) // GREEK CAPITAL LETTER OMICRON WITH TONOS - case 0x038E => Array(0x03CD) // GREEK CAPITAL LETTER UPSILON WITH TONOS - case 0x038F => Array(0x03CE) // GREEK CAPITAL LETTER OMEGA WITH TONOS - case 0x0391 => Array(0x03B1) // GREEK CAPITAL LETTER ALPHA - case 0x0392 => Array(0x03B2) // GREEK CAPITAL LETTER BETA - case 0x0393 => Array(0x03B3) // GREEK CAPITAL LETTER GAMMA - case 0x0394 => Array(0x03B4) // GREEK CAPITAL LETTER DELTA - case 0x0395 => Array(0x03B5) // GREEK CAPITAL LETTER EPSILON - case 0x0396 => Array(0x03B6) // GREEK CAPITAL LETTER ZETA - case 0x0397 => Array(0x03B7) // GREEK CAPITAL LETTER ETA - case 0x0398 => Array(0x03B8) // GREEK CAPITAL LETTER THETA - case 0x0399 => Array(0x03B9) // GREEK CAPITAL LETTER IOTA - case 0x039A => Array(0x03BA) // GREEK CAPITAL LETTER KAPPA - case 0x039B => Array(0x03BB) // GREEK CAPITAL LETTER LAMDA - case 0x039C => Array(0x03BC) // GREEK CAPITAL LETTER MU - case 0x039D => Array(0x03BD) // GREEK CAPITAL LETTER NU - case 0x039E => Array(0x03BE) // GREEK CAPITAL LETTER XI - case 0x039F => Array(0x03BF) // GREEK CAPITAL LETTER OMICRON - case 0x03A0 => Array(0x03C0) // GREEK CAPITAL LETTER PI - case 0x03A1 => Array(0x03C1) // GREEK CAPITAL LETTER RHO - case 0x03A3 => Array(0x03C3) // GREEK CAPITAL LETTER SIGMA - case 0x03A4 => Array(0x03C4) // GREEK CAPITAL LETTER TAU - case 0x03A5 => Array(0x03C5) // GREEK CAPITAL LETTER UPSILON - case 0x03A6 => Array(0x03C6) // GREEK CAPITAL LETTER PHI - case 0x03A7 => Array(0x03C7) // GREEK CAPITAL LETTER CHI - case 0x03A8 => Array(0x03C8) // GREEK CAPITAL LETTER PSI - case 0x03A9 => Array(0x03C9) // GREEK CAPITAL LETTER OMEGA - case 0x03AA => Array(0x03CA) // GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - case 0x03AB => Array(0x03CB) // GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - case 0x03C2 => Array(0x03C3) // GREEK SMALL LETTER FINAL SIGMA - case 0x03CF => Array(0x03D7) // GREEK CAPITAL KAI SYMBOL - case 0x03D0 => Array(0x03B2) // GREEK BETA SYMBOL - case 0x03D1 => Array(0x03B8) // GREEK THETA SYMBOL - case 0x03D5 => Array(0x03C6) // GREEK PHI SYMBOL - case 0x03D6 => Array(0x03C0) // GREEK PI SYMBOL - case 0x03D8 => Array(0x03D9) // GREEK LETTER ARCHAIC KOPPA - case 0x03DA => Array(0x03DB) // GREEK LETTER STIGMA - case 0x03DC => Array(0x03DD) // GREEK LETTER DIGAMMA - case 0x03DE => Array(0x03DF) // GREEK LETTER KOPPA - case 0x03E0 => Array(0x03E1) // GREEK LETTER SAMPI - case 0x03E2 => Array(0x03E3) // COPTIC CAPITAL LETTER SHEI - case 0x03E4 => Array(0x03E5) // COPTIC CAPITAL LETTER FEI - case 0x03E6 => Array(0x03E7) // COPTIC CAPITAL LETTER KHEI - case 0x03E8 => Array(0x03E9) // COPTIC CAPITAL LETTER HORI - case 0x03EA => Array(0x03EB) // COPTIC CAPITAL LETTER GANGIA - case 0x03EC => Array(0x03ED) // COPTIC CAPITAL LETTER SHIMA - case 0x03EE => Array(0x03EF) // COPTIC CAPITAL LETTER DEI - case 0x03F0 => Array(0x03BA) // GREEK KAPPA SYMBOL - case 0x03F1 => Array(0x03C1) // GREEK RHO SYMBOL - case 0x03F4 => Array(0x03B8) // GREEK CAPITAL THETA SYMBOL - case 0x03F5 => Array(0x03B5) // GREEK LUNATE EPSILON SYMBOL - case 0x03F7 => Array(0x03F8) // GREEK CAPITAL LETTER SHO - case 0x03F9 => Array(0x03F2) // GREEK CAPITAL LUNATE SIGMA SYMBOL - case 0x03FA => Array(0x03FB) // GREEK CAPITAL LETTER SAN - case 0x03FD => Array(0x037B) // GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL - case 0x03FE => Array(0x037C) // GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL - case 0x03FF => Array(0x037D) // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL - case 0x0400 => Array(0x0450) // CYRILLIC CAPITAL LETTER IE WITH GRAVE - case 0x0401 => Array(0x0451) // CYRILLIC CAPITAL LETTER IO - case 0x0402 => Array(0x0452) // CYRILLIC CAPITAL LETTER DJE - case 0x0403 => Array(0x0453) // CYRILLIC CAPITAL LETTER GJE - case 0x0404 => Array(0x0454) // CYRILLIC CAPITAL LETTER UKRAINIAN IE - case 0x0405 => Array(0x0455) // CYRILLIC CAPITAL LETTER DZE - case 0x0406 => Array(0x0456) // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - case 0x0407 => Array(0x0457) // CYRILLIC CAPITAL LETTER YI - case 0x0408 => Array(0x0458) // CYRILLIC CAPITAL LETTER JE - case 0x0409 => Array(0x0459) // CYRILLIC CAPITAL LETTER LJE - case 0x040A => Array(0x045A) // CYRILLIC CAPITAL LETTER NJE - case 0x040B => Array(0x045B) // CYRILLIC CAPITAL LETTER TSHE - case 0x040C => Array(0x045C) // CYRILLIC CAPITAL LETTER KJE - case 0x040D => Array(0x045D) // CYRILLIC CAPITAL LETTER I WITH GRAVE - case 0x040E => Array(0x045E) // CYRILLIC CAPITAL LETTER SHORT U - case 0x040F => Array(0x045F) // CYRILLIC CAPITAL LETTER DZHE - case 0x0410 => Array(0x0430) // CYRILLIC CAPITAL LETTER A - case 0x0411 => Array(0x0431) // CYRILLIC CAPITAL LETTER BE - case 0x0412 => Array(0x0432) // CYRILLIC CAPITAL LETTER VE - case 0x0413 => Array(0x0433) // CYRILLIC CAPITAL LETTER GHE - case 0x0414 => Array(0x0434) // CYRILLIC CAPITAL LETTER DE - case 0x0415 => Array(0x0435) // CYRILLIC CAPITAL LETTER IE - case 0x0416 => Array(0x0436) // CYRILLIC CAPITAL LETTER ZHE - case 0x0417 => Array(0x0437) // CYRILLIC CAPITAL LETTER ZE - case 0x0418 => Array(0x0438) // CYRILLIC CAPITAL LETTER I - case 0x0419 => Array(0x0439) // CYRILLIC CAPITAL LETTER SHORT I - case 0x041A => Array(0x043A) // CYRILLIC CAPITAL LETTER KA - case 0x041B => Array(0x043B) // CYRILLIC CAPITAL LETTER EL - case 0x041C => Array(0x043C) // CYRILLIC CAPITAL LETTER EM - case 0x041D => Array(0x043D) // CYRILLIC CAPITAL LETTER EN - case 0x041E => Array(0x043E) // CYRILLIC CAPITAL LETTER O - case 0x041F => Array(0x043F) // CYRILLIC CAPITAL LETTER PE - case 0x0420 => Array(0x0440) // CYRILLIC CAPITAL LETTER ER - case 0x0421 => Array(0x0441) // CYRILLIC CAPITAL LETTER ES - case 0x0422 => Array(0x0442) // CYRILLIC CAPITAL LETTER TE - case 0x0423 => Array(0x0443) // CYRILLIC CAPITAL LETTER U - case 0x0424 => Array(0x0444) // CYRILLIC CAPITAL LETTER EF - case 0x0425 => Array(0x0445) // CYRILLIC CAPITAL LETTER HA - case 0x0426 => Array(0x0446) // CYRILLIC CAPITAL LETTER TSE - case 0x0427 => Array(0x0447) // CYRILLIC CAPITAL LETTER CHE - case 0x0428 => Array(0x0448) // CYRILLIC CAPITAL LETTER SHA - case 0x0429 => Array(0x0449) // CYRILLIC CAPITAL LETTER SHCHA - case 0x042A => Array(0x044A) // CYRILLIC CAPITAL LETTER HARD SIGN - case 0x042B => Array(0x044B) // CYRILLIC CAPITAL LETTER YERU - case 0x042C => Array(0x044C) // CYRILLIC CAPITAL LETTER SOFT SIGN - case 0x042D => Array(0x044D) // CYRILLIC CAPITAL LETTER E - case 0x042E => Array(0x044E) // CYRILLIC CAPITAL LETTER YU - case 0x042F => Array(0x044F) // CYRILLIC CAPITAL LETTER YA - case 0x0460 => Array(0x0461) // CYRILLIC CAPITAL LETTER OMEGA - case 0x0462 => Array(0x0463) // CYRILLIC CAPITAL LETTER YAT - case 0x0464 => Array(0x0465) // CYRILLIC CAPITAL LETTER IOTIFIED E - case 0x0466 => Array(0x0467) // CYRILLIC CAPITAL LETTER LITTLE YUS - case 0x0468 => Array(0x0469) // CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS - case 0x046A => Array(0x046B) // CYRILLIC CAPITAL LETTER BIG YUS - case 0x046C => Array(0x046D) // CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS - case 0x046E => Array(0x046F) // CYRILLIC CAPITAL LETTER KSI - case 0x0470 => Array(0x0471) // CYRILLIC CAPITAL LETTER PSI - case 0x0472 => Array(0x0473) // CYRILLIC CAPITAL LETTER FITA - case 0x0474 => Array(0x0475) // CYRILLIC CAPITAL LETTER IZHITSA - case 0x0476 => Array(0x0477) // CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT - case 0x0478 => Array(0x0479) // CYRILLIC CAPITAL LETTER UK - case 0x047A => Array(0x047B) // CYRILLIC CAPITAL LETTER ROUND OMEGA - case 0x047C => Array(0x047D) // CYRILLIC CAPITAL LETTER OMEGA WITH TITLO - case 0x047E => Array(0x047F) // CYRILLIC CAPITAL LETTER OT - case 0x0480 => Array(0x0481) // CYRILLIC CAPITAL LETTER KOPPA - case 0x048A => Array(0x048B) // CYRILLIC CAPITAL LETTER SHORT I WITH TAIL - case 0x048C => Array(0x048D) // CYRILLIC CAPITAL LETTER SEMISOFT SIGN - case 0x048E => Array(0x048F) // CYRILLIC CAPITAL LETTER ER WITH TICK - case 0x0490 => Array(0x0491) // CYRILLIC CAPITAL LETTER GHE WITH UPTURN - case 0x0492 => Array(0x0493) // CYRILLIC CAPITAL LETTER GHE WITH STROKE - case 0x0494 => Array(0x0495) // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK - case 0x0496 => Array(0x0497) // CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER - case 0x0498 => Array(0x0499) // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER - case 0x049A => Array(0x049B) // CYRILLIC CAPITAL LETTER KA WITH DESCENDER - case 0x049C => Array(0x049D) // CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE - case 0x049E => Array(0x049F) // CYRILLIC CAPITAL LETTER KA WITH STROKE - case 0x04A0 => Array(0x04A1) // CYRILLIC CAPITAL LETTER BASHKIR KA - case 0x04A2 => Array(0x04A3) // CYRILLIC CAPITAL LETTER EN WITH DESCENDER - case 0x04A4 => Array(0x04A5) // CYRILLIC CAPITAL LIGATURE EN GHE - case 0x04A6 => Array(0x04A7) // CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK - case 0x04A8 => Array(0x04A9) // CYRILLIC CAPITAL LETTER ABKHASIAN HA - case 0x04AA => Array(0x04AB) // CYRILLIC CAPITAL LETTER ES WITH DESCENDER - case 0x04AC => Array(0x04AD) // CYRILLIC CAPITAL LETTER TE WITH DESCENDER - case 0x04AE => Array(0x04AF) // CYRILLIC CAPITAL LETTER STRAIGHT U - case 0x04B0 => Array(0x04B1) // CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE - case 0x04B2 => Array(0x04B3) // CYRILLIC CAPITAL LETTER HA WITH DESCENDER - case 0x04B4 => Array(0x04B5) // CYRILLIC CAPITAL LIGATURE TE TSE - case 0x04B6 => Array(0x04B7) // CYRILLIC CAPITAL LETTER CHE WITH DESCENDER - case 0x04B8 => Array(0x04B9) // CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE - case 0x04BA => Array(0x04BB) // CYRILLIC CAPITAL LETTER SHHA - case 0x04BC => Array(0x04BD) // CYRILLIC CAPITAL LETTER ABKHASIAN CHE - case 0x04BE => Array(0x04BF) // CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER - case 0x04C0 => Array(0x04CF) // CYRILLIC LETTER PALOCHKA - case 0x04C1 => Array(0x04C2) // CYRILLIC CAPITAL LETTER ZHE WITH BREVE - case 0x04C3 => Array(0x04C4) // CYRILLIC CAPITAL LETTER KA WITH HOOK - case 0x04C5 => Array(0x04C6) // CYRILLIC CAPITAL LETTER EL WITH TAIL - case 0x04C7 => Array(0x04C8) // CYRILLIC CAPITAL LETTER EN WITH HOOK - case 0x04C9 => Array(0x04CA) // CYRILLIC CAPITAL LETTER EN WITH TAIL - case 0x04CB => Array(0x04CC) // CYRILLIC CAPITAL LETTER KHAKASSIAN CHE - case 0x04CD => Array(0x04CE) // CYRILLIC CAPITAL LETTER EM WITH TAIL - case 0x04D0 => Array(0x04D1) // CYRILLIC CAPITAL LETTER A WITH BREVE - case 0x04D2 => Array(0x04D3) // CYRILLIC CAPITAL LETTER A WITH DIAERESIS - case 0x04D4 => Array(0x04D5) // CYRILLIC CAPITAL LIGATURE A IE - case 0x04D6 => Array(0x04D7) // CYRILLIC CAPITAL LETTER IE WITH BREVE - case 0x04D8 => Array(0x04D9) // CYRILLIC CAPITAL LETTER SCHWA - case 0x04DA => Array(0x04DB) // CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS - case 0x04DC => Array(0x04DD) // CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS - case 0x04DE => Array(0x04DF) // CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS - case 0x04E0 => Array(0x04E1) // CYRILLIC CAPITAL LETTER ABKHASIAN DZE - case 0x04E2 => Array(0x04E3) // CYRILLIC CAPITAL LETTER I WITH MACRON - case 0x04E4 => Array(0x04E5) // CYRILLIC CAPITAL LETTER I WITH DIAERESIS - case 0x04E6 => Array(0x04E7) // CYRILLIC CAPITAL LETTER O WITH DIAERESIS - case 0x04E8 => Array(0x04E9) // CYRILLIC CAPITAL LETTER BARRED O - case 0x04EA => Array(0x04EB) // CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS - case 0x04EC => Array(0x04ED) // CYRILLIC CAPITAL LETTER E WITH DIAERESIS - case 0x04EE => Array(0x04EF) // CYRILLIC CAPITAL LETTER U WITH MACRON - case 0x04F0 => Array(0x04F1) // CYRILLIC CAPITAL LETTER U WITH DIAERESIS - case 0x04F2 => Array(0x04F3) // CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE - case 0x04F4 => Array(0x04F5) // CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS - case 0x04F6 => Array(0x04F7) // CYRILLIC CAPITAL LETTER GHE WITH DESCENDER - case 0x04F8 => Array(0x04F9) // CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS - case 0x04FA => Array(0x04FB) // CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK - case 0x04FC => Array(0x04FD) // CYRILLIC CAPITAL LETTER HA WITH HOOK - case 0x04FE => Array(0x04FF) // CYRILLIC CAPITAL LETTER HA WITH STROKE - case 0x0500 => Array(0x0501) // CYRILLIC CAPITAL LETTER KOMI DE - case 0x0502 => Array(0x0503) // CYRILLIC CAPITAL LETTER KOMI DJE - case 0x0504 => Array(0x0505) // CYRILLIC CAPITAL LETTER KOMI ZJE - case 0x0506 => Array(0x0507) // CYRILLIC CAPITAL LETTER KOMI DZJE - case 0x0508 => Array(0x0509) // CYRILLIC CAPITAL LETTER KOMI LJE - case 0x050A => Array(0x050B) // CYRILLIC CAPITAL LETTER KOMI NJE - case 0x050C => Array(0x050D) // CYRILLIC CAPITAL LETTER KOMI SJE - case 0x050E => Array(0x050F) // CYRILLIC CAPITAL LETTER KOMI TJE - case 0x0510 => Array(0x0511) // CYRILLIC CAPITAL LETTER REVERSED ZE - case 0x0512 => Array(0x0513) // CYRILLIC CAPITAL LETTER EL WITH HOOK - case 0x0514 => Array(0x0515) // CYRILLIC CAPITAL LETTER LHA - case 0x0516 => Array(0x0517) // CYRILLIC CAPITAL LETTER RHA - case 0x0518 => Array(0x0519) // CYRILLIC CAPITAL LETTER YAE - case 0x051A => Array(0x051B) // CYRILLIC CAPITAL LETTER QA - case 0x051C => Array(0x051D) // CYRILLIC CAPITAL LETTER WE - case 0x051E => Array(0x051F) // CYRILLIC CAPITAL LETTER ALEUT KA - case 0x0520 => Array(0x0521) // CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK - case 0x0522 => Array(0x0523) // CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK - case 0x0524 => Array(0x0525) // CYRILLIC CAPITAL LETTER PE WITH DESCENDER - case 0x0526 => Array(0x0527) // CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER - case 0x0528 => Array(0x0529) // CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK - case 0x052A => Array(0x052B) // CYRILLIC CAPITAL LETTER DZZHE - case 0x052C => Array(0x052D) // CYRILLIC CAPITAL LETTER DCHE - case 0x052E => Array(0x052F) // CYRILLIC CAPITAL LETTER EL WITH DESCENDER - case 0x0531 => Array(0x0561) // ARMENIAN CAPITAL LETTER AYB - case 0x0532 => Array(0x0562) // ARMENIAN CAPITAL LETTER BEN - case 0x0533 => Array(0x0563) // ARMENIAN CAPITAL LETTER GIM - case 0x0534 => Array(0x0564) // ARMENIAN CAPITAL LETTER DA - case 0x0535 => Array(0x0565) // ARMENIAN CAPITAL LETTER ECH - case 0x0536 => Array(0x0566) // ARMENIAN CAPITAL LETTER ZA - case 0x0537 => Array(0x0567) // ARMENIAN CAPITAL LETTER EH - case 0x0538 => Array(0x0568) // ARMENIAN CAPITAL LETTER ET - case 0x0539 => Array(0x0569) // ARMENIAN CAPITAL LETTER TO - case 0x053A => Array(0x056A) // ARMENIAN CAPITAL LETTER ZHE - case 0x053B => Array(0x056B) // ARMENIAN CAPITAL LETTER INI - case 0x053C => Array(0x056C) // ARMENIAN CAPITAL LETTER LIWN - case 0x053D => Array(0x056D) // ARMENIAN CAPITAL LETTER XEH - case 0x053E => Array(0x056E) // ARMENIAN CAPITAL LETTER CA - case 0x053F => Array(0x056F) // ARMENIAN CAPITAL LETTER KEN - case 0x0540 => Array(0x0570) // ARMENIAN CAPITAL LETTER HO - case 0x0541 => Array(0x0571) // ARMENIAN CAPITAL LETTER JA - case 0x0542 => Array(0x0572) // ARMENIAN CAPITAL LETTER GHAD - case 0x0543 => Array(0x0573) // ARMENIAN CAPITAL LETTER CHEH - case 0x0544 => Array(0x0574) // ARMENIAN CAPITAL LETTER MEN - case 0x0545 => Array(0x0575) // ARMENIAN CAPITAL LETTER YI - case 0x0546 => Array(0x0576) // ARMENIAN CAPITAL LETTER NOW - case 0x0547 => Array(0x0577) // ARMENIAN CAPITAL LETTER SHA - case 0x0548 => Array(0x0578) // ARMENIAN CAPITAL LETTER VO - case 0x0549 => Array(0x0579) // ARMENIAN CAPITAL LETTER CHA - case 0x054A => Array(0x057A) // ARMENIAN CAPITAL LETTER PEH - case 0x054B => Array(0x057B) // ARMENIAN CAPITAL LETTER JHEH - case 0x054C => Array(0x057C) // ARMENIAN CAPITAL LETTER RA - case 0x054D => Array(0x057D) // ARMENIAN CAPITAL LETTER SEH - case 0x054E => Array(0x057E) // ARMENIAN CAPITAL LETTER VEW - case 0x054F => Array(0x057F) // ARMENIAN CAPITAL LETTER TIWN - case 0x0550 => Array(0x0580) // ARMENIAN CAPITAL LETTER REH - case 0x0551 => Array(0x0581) // ARMENIAN CAPITAL LETTER CO - case 0x0552 => Array(0x0582) // ARMENIAN CAPITAL LETTER YIWN - case 0x0553 => Array(0x0583) // ARMENIAN CAPITAL LETTER PIWR - case 0x0554 => Array(0x0584) // ARMENIAN CAPITAL LETTER KEH - case 0x0555 => Array(0x0585) // ARMENIAN CAPITAL LETTER OH - case 0x0556 => Array(0x0586) // ARMENIAN CAPITAL LETTER FEH - case 0x10A0 => Array(0x2D00) // GEORGIAN CAPITAL LETTER AN - case 0x10A1 => Array(0x2D01) // GEORGIAN CAPITAL LETTER BAN - case 0x10A2 => Array(0x2D02) // GEORGIAN CAPITAL LETTER GAN - case 0x10A3 => Array(0x2D03) // GEORGIAN CAPITAL LETTER DON - case 0x10A4 => Array(0x2D04) // GEORGIAN CAPITAL LETTER EN - case 0x10A5 => Array(0x2D05) // GEORGIAN CAPITAL LETTER VIN - case 0x10A6 => Array(0x2D06) // GEORGIAN CAPITAL LETTER ZEN - case 0x10A7 => Array(0x2D07) // GEORGIAN CAPITAL LETTER TAN - case 0x10A8 => Array(0x2D08) // GEORGIAN CAPITAL LETTER IN - case 0x10A9 => Array(0x2D09) // GEORGIAN CAPITAL LETTER KAN - case 0x10AA => Array(0x2D0A) // GEORGIAN CAPITAL LETTER LAS - case 0x10AB => Array(0x2D0B) // GEORGIAN CAPITAL LETTER MAN - case 0x10AC => Array(0x2D0C) // GEORGIAN CAPITAL LETTER NAR - case 0x10AD => Array(0x2D0D) // GEORGIAN CAPITAL LETTER ON - case 0x10AE => Array(0x2D0E) // GEORGIAN CAPITAL LETTER PAR - case 0x10AF => Array(0x2D0F) // GEORGIAN CAPITAL LETTER ZHAR - case 0x10B0 => Array(0x2D10) // GEORGIAN CAPITAL LETTER RAE - case 0x10B1 => Array(0x2D11) // GEORGIAN CAPITAL LETTER SAN - case 0x10B2 => Array(0x2D12) // GEORGIAN CAPITAL LETTER TAR - case 0x10B3 => Array(0x2D13) // GEORGIAN CAPITAL LETTER UN - case 0x10B4 => Array(0x2D14) // GEORGIAN CAPITAL LETTER PHAR - case 0x10B5 => Array(0x2D15) // GEORGIAN CAPITAL LETTER KHAR - case 0x10B6 => Array(0x2D16) // GEORGIAN CAPITAL LETTER GHAN - case 0x10B7 => Array(0x2D17) // GEORGIAN CAPITAL LETTER QAR - case 0x10B8 => Array(0x2D18) // GEORGIAN CAPITAL LETTER SHIN - case 0x10B9 => Array(0x2D19) // GEORGIAN CAPITAL LETTER CHIN - case 0x10BA => Array(0x2D1A) // GEORGIAN CAPITAL LETTER CAN - case 0x10BB => Array(0x2D1B) // GEORGIAN CAPITAL LETTER JIL - case 0x10BC => Array(0x2D1C) // GEORGIAN CAPITAL LETTER CIL - case 0x10BD => Array(0x2D1D) // GEORGIAN CAPITAL LETTER CHAR - case 0x10BE => Array(0x2D1E) // GEORGIAN CAPITAL LETTER XAN - case 0x10BF => Array(0x2D1F) // GEORGIAN CAPITAL LETTER JHAN - case 0x10C0 => Array(0x2D20) // GEORGIAN CAPITAL LETTER HAE - case 0x10C1 => Array(0x2D21) // GEORGIAN CAPITAL LETTER HE - case 0x10C2 => Array(0x2D22) // GEORGIAN CAPITAL LETTER HIE - case 0x10C3 => Array(0x2D23) // GEORGIAN CAPITAL LETTER WE - case 0x10C4 => Array(0x2D24) // GEORGIAN CAPITAL LETTER HAR - case 0x10C5 => Array(0x2D25) // GEORGIAN CAPITAL LETTER HOE - case 0x10C7 => Array(0x2D27) // GEORGIAN CAPITAL LETTER YN - case 0x10CD => Array(0x2D2D) // GEORGIAN CAPITAL LETTER AEN - case 0x13F8 => Array(0x13F0) // CHEROKEE SMALL LETTER YE - case 0x13F9 => Array(0x13F1) // CHEROKEE SMALL LETTER YI - case 0x13FA => Array(0x13F2) // CHEROKEE SMALL LETTER YO - case 0x13FB => Array(0x13F3) // CHEROKEE SMALL LETTER YU - case 0x13FC => Array(0x13F4) // CHEROKEE SMALL LETTER YV - case 0x13FD => Array(0x13F5) // CHEROKEE SMALL LETTER MV - case 0x1C80 => Array(0x0432) // CYRILLIC SMALL LETTER ROUNDED VE - case 0x1C81 => Array(0x0434) // CYRILLIC SMALL LETTER LONG-LEGGED DE - case 0x1C82 => Array(0x043E) // CYRILLIC SMALL LETTER NARROW O - case 0x1C83 => Array(0x0441) // CYRILLIC SMALL LETTER WIDE ES - case 0x1C84 => Array(0x0442) // CYRILLIC SMALL LETTER TALL TE - case 0x1C85 => Array(0x0442) // CYRILLIC SMALL LETTER THREE-LEGGED TE - case 0x1C86 => Array(0x044A) // CYRILLIC SMALL LETTER TALL HARD SIGN - case 0x1C87 => Array(0x0463) // CYRILLIC SMALL LETTER TALL YAT - case 0x1C88 => Array(0xA64B) // CYRILLIC SMALL LETTER UNBLENDED UK - case 0x1C90 => Array(0x10D0) // GEORGIAN MTAVRULI CAPITAL LETTER AN - case 0x1C91 => Array(0x10D1) // GEORGIAN MTAVRULI CAPITAL LETTER BAN - case 0x1C92 => Array(0x10D2) // GEORGIAN MTAVRULI CAPITAL LETTER GAN - case 0x1C93 => Array(0x10D3) // GEORGIAN MTAVRULI CAPITAL LETTER DON - case 0x1C94 => Array(0x10D4) // GEORGIAN MTAVRULI CAPITAL LETTER EN - case 0x1C95 => Array(0x10D5) // GEORGIAN MTAVRULI CAPITAL LETTER VIN - case 0x1C96 => Array(0x10D6) // GEORGIAN MTAVRULI CAPITAL LETTER ZEN - case 0x1C97 => Array(0x10D7) // GEORGIAN MTAVRULI CAPITAL LETTER TAN - case 0x1C98 => Array(0x10D8) // GEORGIAN MTAVRULI CAPITAL LETTER IN - case 0x1C99 => Array(0x10D9) // GEORGIAN MTAVRULI CAPITAL LETTER KAN - case 0x1C9A => Array(0x10DA) // GEORGIAN MTAVRULI CAPITAL LETTER LAS - case 0x1C9B => Array(0x10DB) // GEORGIAN MTAVRULI CAPITAL LETTER MAN - case 0x1C9C => Array(0x10DC) // GEORGIAN MTAVRULI CAPITAL LETTER NAR - case 0x1C9D => Array(0x10DD) // GEORGIAN MTAVRULI CAPITAL LETTER ON - case 0x1C9E => Array(0x10DE) // GEORGIAN MTAVRULI CAPITAL LETTER PAR - case 0x1C9F => Array(0x10DF) // GEORGIAN MTAVRULI CAPITAL LETTER ZHAR - case 0x1CA0 => Array(0x10E0) // GEORGIAN MTAVRULI CAPITAL LETTER RAE - case 0x1CA1 => Array(0x10E1) // GEORGIAN MTAVRULI CAPITAL LETTER SAN - case 0x1CA2 => Array(0x10E2) // GEORGIAN MTAVRULI CAPITAL LETTER TAR - case 0x1CA3 => Array(0x10E3) // GEORGIAN MTAVRULI CAPITAL LETTER UN - case 0x1CA4 => Array(0x10E4) // GEORGIAN MTAVRULI CAPITAL LETTER PHAR - case 0x1CA5 => Array(0x10E5) // GEORGIAN MTAVRULI CAPITAL LETTER KHAR - case 0x1CA6 => Array(0x10E6) // GEORGIAN MTAVRULI CAPITAL LETTER GHAN - case 0x1CA7 => Array(0x10E7) // GEORGIAN MTAVRULI CAPITAL LETTER QAR - case 0x1CA8 => Array(0x10E8) // GEORGIAN MTAVRULI CAPITAL LETTER SHIN - case 0x1CA9 => Array(0x10E9) // GEORGIAN MTAVRULI CAPITAL LETTER CHIN - case 0x1CAA => Array(0x10EA) // GEORGIAN MTAVRULI CAPITAL LETTER CAN - case 0x1CAB => Array(0x10EB) // GEORGIAN MTAVRULI CAPITAL LETTER JIL - case 0x1CAC => Array(0x10EC) // GEORGIAN MTAVRULI CAPITAL LETTER CIL - case 0x1CAD => Array(0x10ED) // GEORGIAN MTAVRULI CAPITAL LETTER CHAR - case 0x1CAE => Array(0x10EE) // GEORGIAN MTAVRULI CAPITAL LETTER XAN - case 0x1CAF => Array(0x10EF) // GEORGIAN MTAVRULI CAPITAL LETTER JHAN - case 0x1CB0 => Array(0x10F0) // GEORGIAN MTAVRULI CAPITAL LETTER HAE - case 0x1CB1 => Array(0x10F1) // GEORGIAN MTAVRULI CAPITAL LETTER HE - case 0x1CB2 => Array(0x10F2) // GEORGIAN MTAVRULI CAPITAL LETTER HIE - case 0x1CB3 => Array(0x10F3) // GEORGIAN MTAVRULI CAPITAL LETTER WE - case 0x1CB4 => Array(0x10F4) // GEORGIAN MTAVRULI CAPITAL LETTER HAR - case 0x1CB5 => Array(0x10F5) // GEORGIAN MTAVRULI CAPITAL LETTER HOE - case 0x1CB6 => Array(0x10F6) // GEORGIAN MTAVRULI CAPITAL LETTER FI - case 0x1CB7 => Array(0x10F7) // GEORGIAN MTAVRULI CAPITAL LETTER YN - case 0x1CB8 => Array(0x10F8) // GEORGIAN MTAVRULI CAPITAL LETTER ELIFI - case 0x1CB9 => Array(0x10F9) // GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN - case 0x1CBA => Array(0x10FA) // GEORGIAN MTAVRULI CAPITAL LETTER AIN - case 0x1CBD => Array(0x10FD) // GEORGIAN MTAVRULI CAPITAL LETTER AEN - case 0x1CBE => Array(0x10FE) // GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN - case 0x1CBF => Array(0x10FF) // GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN - case 0x1E00 => Array(0x1E01) // LATIN CAPITAL LETTER A WITH RING BELOW - case 0x1E02 => Array(0x1E03) // LATIN CAPITAL LETTER B WITH DOT ABOVE - case 0x1E04 => Array(0x1E05) // LATIN CAPITAL LETTER B WITH DOT BELOW - case 0x1E06 => Array(0x1E07) // LATIN CAPITAL LETTER B WITH LINE BELOW - case 0x1E08 => Array(0x1E09) // LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE - case 0x1E0A => Array(0x1E0B) // LATIN CAPITAL LETTER D WITH DOT ABOVE - case 0x1E0C => Array(0x1E0D) // LATIN CAPITAL LETTER D WITH DOT BELOW - case 0x1E0E => Array(0x1E0F) // LATIN CAPITAL LETTER D WITH LINE BELOW - case 0x1E10 => Array(0x1E11) // LATIN CAPITAL LETTER D WITH CEDILLA - case 0x1E12 => Array(0x1E13) // LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW - case 0x1E14 => Array(0x1E15) // LATIN CAPITAL LETTER E WITH MACRON AND GRAVE - case 0x1E16 => Array(0x1E17) // LATIN CAPITAL LETTER E WITH MACRON AND ACUTE - case 0x1E18 => Array(0x1E19) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW - case 0x1E1A => Array(0x1E1B) // LATIN CAPITAL LETTER E WITH TILDE BELOW - case 0x1E1C => Array(0x1E1D) // LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE - case 0x1E1E => Array(0x1E1F) // LATIN CAPITAL LETTER F WITH DOT ABOVE - case 0x1E20 => Array(0x1E21) // LATIN CAPITAL LETTER G WITH MACRON - case 0x1E22 => Array(0x1E23) // LATIN CAPITAL LETTER H WITH DOT ABOVE - case 0x1E24 => Array(0x1E25) // LATIN CAPITAL LETTER H WITH DOT BELOW - case 0x1E26 => Array(0x1E27) // LATIN CAPITAL LETTER H WITH DIAERESIS - case 0x1E28 => Array(0x1E29) // LATIN CAPITAL LETTER H WITH CEDILLA - case 0x1E2A => Array(0x1E2B) // LATIN CAPITAL LETTER H WITH BREVE BELOW - case 0x1E2C => Array(0x1E2D) // LATIN CAPITAL LETTER I WITH TILDE BELOW - case 0x1E2E => Array(0x1E2F) // LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE - case 0x1E30 => Array(0x1E31) // LATIN CAPITAL LETTER K WITH ACUTE - case 0x1E32 => Array(0x1E33) // LATIN CAPITAL LETTER K WITH DOT BELOW - case 0x1E34 => Array(0x1E35) // LATIN CAPITAL LETTER K WITH LINE BELOW - case 0x1E36 => Array(0x1E37) // LATIN CAPITAL LETTER L WITH DOT BELOW - case 0x1E38 => Array(0x1E39) // LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON - case 0x1E3A => Array(0x1E3B) // LATIN CAPITAL LETTER L WITH LINE BELOW - case 0x1E3C => Array(0x1E3D) // LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW - case 0x1E3E => Array(0x1E3F) // LATIN CAPITAL LETTER M WITH ACUTE - case 0x1E40 => Array(0x1E41) // LATIN CAPITAL LETTER M WITH DOT ABOVE - case 0x1E42 => Array(0x1E43) // LATIN CAPITAL LETTER M WITH DOT BELOW - case 0x1E44 => Array(0x1E45) // LATIN CAPITAL LETTER N WITH DOT ABOVE - case 0x1E46 => Array(0x1E47) // LATIN CAPITAL LETTER N WITH DOT BELOW - case 0x1E48 => Array(0x1E49) // LATIN CAPITAL LETTER N WITH LINE BELOW - case 0x1E4A => Array(0x1E4B) // LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW - case 0x1E4C => Array(0x1E4D) // LATIN CAPITAL LETTER O WITH TILDE AND ACUTE - case 0x1E4E => Array(0x1E4F) // LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS - case 0x1E50 => Array(0x1E51) // LATIN CAPITAL LETTER O WITH MACRON AND GRAVE - case 0x1E52 => Array(0x1E53) // LATIN CAPITAL LETTER O WITH MACRON AND ACUTE - case 0x1E54 => Array(0x1E55) // LATIN CAPITAL LETTER P WITH ACUTE - case 0x1E56 => Array(0x1E57) // LATIN CAPITAL LETTER P WITH DOT ABOVE - case 0x1E58 => Array(0x1E59) // LATIN CAPITAL LETTER R WITH DOT ABOVE - case 0x1E5A => Array(0x1E5B) // LATIN CAPITAL LETTER R WITH DOT BELOW - case 0x1E5C => Array(0x1E5D) // LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON - case 0x1E5E => Array(0x1E5F) // LATIN CAPITAL LETTER R WITH LINE BELOW - case 0x1E60 => Array(0x1E61) // LATIN CAPITAL LETTER S WITH DOT ABOVE - case 0x1E62 => Array(0x1E63) // LATIN CAPITAL LETTER S WITH DOT BELOW - case 0x1E64 => Array(0x1E65) // LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE - case 0x1E66 => Array(0x1E67) // LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE - case 0x1E68 => Array(0x1E69) // LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE - case 0x1E6A => Array(0x1E6B) // LATIN CAPITAL LETTER T WITH DOT ABOVE - case 0x1E6C => Array(0x1E6D) // LATIN CAPITAL LETTER T WITH DOT BELOW - case 0x1E6E => Array(0x1E6F) // LATIN CAPITAL LETTER T WITH LINE BELOW - case 0x1E70 => Array(0x1E71) // LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW - case 0x1E72 => Array(0x1E73) // LATIN CAPITAL LETTER U WITH DIAERESIS BELOW - case 0x1E74 => Array(0x1E75) // LATIN CAPITAL LETTER U WITH TILDE BELOW - case 0x1E76 => Array(0x1E77) // LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW - case 0x1E78 => Array(0x1E79) // LATIN CAPITAL LETTER U WITH TILDE AND ACUTE - case 0x1E7A => Array(0x1E7B) // LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS - case 0x1E7C => Array(0x1E7D) // LATIN CAPITAL LETTER V WITH TILDE - case 0x1E7E => Array(0x1E7F) // LATIN CAPITAL LETTER V WITH DOT BELOW - case 0x1E80 => Array(0x1E81) // LATIN CAPITAL LETTER W WITH GRAVE - case 0x1E82 => Array(0x1E83) // LATIN CAPITAL LETTER W WITH ACUTE - case 0x1E84 => Array(0x1E85) // LATIN CAPITAL LETTER W WITH DIAERESIS - case 0x1E86 => Array(0x1E87) // LATIN CAPITAL LETTER W WITH DOT ABOVE - case 0x1E88 => Array(0x1E89) // LATIN CAPITAL LETTER W WITH DOT BELOW - case 0x1E8A => Array(0x1E8B) // LATIN CAPITAL LETTER X WITH DOT ABOVE - case 0x1E8C => Array(0x1E8D) // LATIN CAPITAL LETTER X WITH DIAERESIS - case 0x1E8E => Array(0x1E8F) // LATIN CAPITAL LETTER Y WITH DOT ABOVE - case 0x1E90 => Array(0x1E91) // LATIN CAPITAL LETTER Z WITH CIRCUMFLEX - case 0x1E92 => Array(0x1E93) // LATIN CAPITAL LETTER Z WITH DOT BELOW - case 0x1E94 => Array(0x1E95) // LATIN CAPITAL LETTER Z WITH LINE BELOW - case 0x1E9B => Array(0x1E61) // LATIN SMALL LETTER LONG S WITH DOT ABOVE - case 0x1EA0 => Array(0x1EA1) // LATIN CAPITAL LETTER A WITH DOT BELOW - case 0x1EA2 => Array(0x1EA3) // LATIN CAPITAL LETTER A WITH HOOK ABOVE - case 0x1EA4 => Array(0x1EA5) // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE - case 0x1EA6 => Array(0x1EA7) // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE - case 0x1EA8 => Array(0x1EA9) // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE - case 0x1EAA => Array(0x1EAB) // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE - case 0x1EAC => Array(0x1EAD) // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW - case 0x1EAE => Array(0x1EAF) // LATIN CAPITAL LETTER A WITH BREVE AND ACUTE - case 0x1EB0 => Array(0x1EB1) // LATIN CAPITAL LETTER A WITH BREVE AND GRAVE - case 0x1EB2 => Array(0x1EB3) // LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE - case 0x1EB4 => Array(0x1EB5) // LATIN CAPITAL LETTER A WITH BREVE AND TILDE - case 0x1EB6 => Array(0x1EB7) // LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW - case 0x1EB8 => Array(0x1EB9) // LATIN CAPITAL LETTER E WITH DOT BELOW - case 0x1EBA => Array(0x1EBB) // LATIN CAPITAL LETTER E WITH HOOK ABOVE - case 0x1EBC => Array(0x1EBD) // LATIN CAPITAL LETTER E WITH TILDE - case 0x1EBE => Array(0x1EBF) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE - case 0x1EC0 => Array(0x1EC1) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE - case 0x1EC2 => Array(0x1EC3) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE - case 0x1EC4 => Array(0x1EC5) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE - case 0x1EC6 => Array(0x1EC7) // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW - case 0x1EC8 => Array(0x1EC9) // LATIN CAPITAL LETTER I WITH HOOK ABOVE - case 0x1ECA => Array(0x1ECB) // LATIN CAPITAL LETTER I WITH DOT BELOW - case 0x1ECC => Array(0x1ECD) // LATIN CAPITAL LETTER O WITH DOT BELOW - case 0x1ECE => Array(0x1ECF) // LATIN CAPITAL LETTER O WITH HOOK ABOVE - case 0x1ED0 => Array(0x1ED1) // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE - case 0x1ED2 => Array(0x1ED3) // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE - case 0x1ED4 => Array(0x1ED5) // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE - case 0x1ED6 => Array(0x1ED7) // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE - case 0x1ED8 => Array(0x1ED9) // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW - case 0x1EDA => Array(0x1EDB) // LATIN CAPITAL LETTER O WITH HORN AND ACUTE - case 0x1EDC => Array(0x1EDD) // LATIN CAPITAL LETTER O WITH HORN AND GRAVE - case 0x1EDE => Array(0x1EDF) // LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE - case 0x1EE0 => Array(0x1EE1) // LATIN CAPITAL LETTER O WITH HORN AND TILDE - case 0x1EE2 => Array(0x1EE3) // LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW - case 0x1EE4 => Array(0x1EE5) // LATIN CAPITAL LETTER U WITH DOT BELOW - case 0x1EE6 => Array(0x1EE7) // LATIN CAPITAL LETTER U WITH HOOK ABOVE - case 0x1EE8 => Array(0x1EE9) // LATIN CAPITAL LETTER U WITH HORN AND ACUTE - case 0x1EEA => Array(0x1EEB) // LATIN CAPITAL LETTER U WITH HORN AND GRAVE - case 0x1EEC => Array(0x1EED) // LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE - case 0x1EEE => Array(0x1EEF) // LATIN CAPITAL LETTER U WITH HORN AND TILDE - case 0x1EF0 => Array(0x1EF1) // LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW - case 0x1EF2 => Array(0x1EF3) // LATIN CAPITAL LETTER Y WITH GRAVE - case 0x1EF4 => Array(0x1EF5) // LATIN CAPITAL LETTER Y WITH DOT BELOW - case 0x1EF6 => Array(0x1EF7) // LATIN CAPITAL LETTER Y WITH HOOK ABOVE - case 0x1EF8 => Array(0x1EF9) // LATIN CAPITAL LETTER Y WITH TILDE - case 0x1EFA => Array(0x1EFB) // LATIN CAPITAL LETTER MIDDLE-WELSH LL - case 0x1EFC => Array(0x1EFD) // LATIN CAPITAL LETTER MIDDLE-WELSH V - case 0x1EFE => Array(0x1EFF) // LATIN CAPITAL LETTER Y WITH LOOP - case 0x1F08 => Array(0x1F00) // GREEK CAPITAL LETTER ALPHA WITH PSILI - case 0x1F09 => Array(0x1F01) // GREEK CAPITAL LETTER ALPHA WITH DASIA - case 0x1F0A => Array(0x1F02) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA - case 0x1F0B => Array(0x1F03) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA - case 0x1F0C => Array(0x1F04) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA - case 0x1F0D => Array(0x1F05) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA - case 0x1F0E => Array(0x1F06) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI - case 0x1F0F => Array(0x1F07) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI - case 0x1F18 => Array(0x1F10) // GREEK CAPITAL LETTER EPSILON WITH PSILI - case 0x1F19 => Array(0x1F11) // GREEK CAPITAL LETTER EPSILON WITH DASIA - case 0x1F1A => Array(0x1F12) // GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA - case 0x1F1B => Array(0x1F13) // GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA - case 0x1F1C => Array(0x1F14) // GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA - case 0x1F1D => Array(0x1F15) // GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA - case 0x1F28 => Array(0x1F20) // GREEK CAPITAL LETTER ETA WITH PSILI - case 0x1F29 => Array(0x1F21) // GREEK CAPITAL LETTER ETA WITH DASIA - case 0x1F2A => Array(0x1F22) // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA - case 0x1F2B => Array(0x1F23) // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA - case 0x1F2C => Array(0x1F24) // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA - case 0x1F2D => Array(0x1F25) // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA - case 0x1F2E => Array(0x1F26) // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI - case 0x1F2F => Array(0x1F27) // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI - case 0x1F38 => Array(0x1F30) // GREEK CAPITAL LETTER IOTA WITH PSILI - case 0x1F39 => Array(0x1F31) // GREEK CAPITAL LETTER IOTA WITH DASIA - case 0x1F3A => Array(0x1F32) // GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA - case 0x1F3B => Array(0x1F33) // GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA - case 0x1F3C => Array(0x1F34) // GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA - case 0x1F3D => Array(0x1F35) // GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA - case 0x1F3E => Array(0x1F36) // GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI - case 0x1F3F => Array(0x1F37) // GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI - case 0x1F48 => Array(0x1F40) // GREEK CAPITAL LETTER OMICRON WITH PSILI - case 0x1F49 => Array(0x1F41) // GREEK CAPITAL LETTER OMICRON WITH DASIA - case 0x1F4A => Array(0x1F42) // GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA - case 0x1F4B => Array(0x1F43) // GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA - case 0x1F4C => Array(0x1F44) // GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA - case 0x1F4D => Array(0x1F45) // GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA - case 0x1F59 => Array(0x1F51) // GREEK CAPITAL LETTER UPSILON WITH DASIA - case 0x1F5B => Array(0x1F53) // GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA - case 0x1F5D => Array(0x1F55) // GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA - case 0x1F5F => Array(0x1F57) // GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI - case 0x1F68 => Array(0x1F60) // GREEK CAPITAL LETTER OMEGA WITH PSILI - case 0x1F69 => Array(0x1F61) // GREEK CAPITAL LETTER OMEGA WITH DASIA - case 0x1F6A => Array(0x1F62) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA - case 0x1F6B => Array(0x1F63) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA - case 0x1F6C => Array(0x1F64) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA - case 0x1F6D => Array(0x1F65) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA - case 0x1F6E => Array(0x1F66) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI - case 0x1F6F => Array(0x1F67) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI - case 0x1FB8 => Array(0x1FB0) // GREEK CAPITAL LETTER ALPHA WITH VRACHY - case 0x1FB9 => Array(0x1FB1) // GREEK CAPITAL LETTER ALPHA WITH MACRON - case 0x1FBA => Array(0x1F70) // GREEK CAPITAL LETTER ALPHA WITH VARIA - case 0x1FBB => Array(0x1F71) // GREEK CAPITAL LETTER ALPHA WITH OXIA - case 0x1FBE => Array(0x03B9) // GREEK PROSGEGRAMMENI - case 0x1FC8 => Array(0x1F72) // GREEK CAPITAL LETTER EPSILON WITH VARIA - case 0x1FC9 => Array(0x1F73) // GREEK CAPITAL LETTER EPSILON WITH OXIA - case 0x1FCA => Array(0x1F74) // GREEK CAPITAL LETTER ETA WITH VARIA - case 0x1FCB => Array(0x1F75) // GREEK CAPITAL LETTER ETA WITH OXIA - case 0x1FD8 => Array(0x1FD0) // GREEK CAPITAL LETTER IOTA WITH VRACHY - case 0x1FD9 => Array(0x1FD1) // GREEK CAPITAL LETTER IOTA WITH MACRON - case 0x1FDA => Array(0x1F76) // GREEK CAPITAL LETTER IOTA WITH VARIA - case 0x1FDB => Array(0x1F77) // GREEK CAPITAL LETTER IOTA WITH OXIA - case 0x1FE8 => Array(0x1FE0) // GREEK CAPITAL LETTER UPSILON WITH VRACHY - case 0x1FE9 => Array(0x1FE1) // GREEK CAPITAL LETTER UPSILON WITH MACRON - case 0x1FEA => Array(0x1F7A) // GREEK CAPITAL LETTER UPSILON WITH VARIA - case 0x1FEB => Array(0x1F7B) // GREEK CAPITAL LETTER UPSILON WITH OXIA - case 0x1FEC => Array(0x1FE5) // GREEK CAPITAL LETTER RHO WITH DASIA - case 0x1FF8 => Array(0x1F78) // GREEK CAPITAL LETTER OMICRON WITH VARIA - case 0x1FF9 => Array(0x1F79) // GREEK CAPITAL LETTER OMICRON WITH OXIA - case 0x1FFA => Array(0x1F7C) // GREEK CAPITAL LETTER OMEGA WITH VARIA - case 0x1FFB => Array(0x1F7D) // GREEK CAPITAL LETTER OMEGA WITH OXIA - case 0x2126 => Array(0x03C9) // OHM SIGN - case 0x212A => Array(0x006B) // KELVIN SIGN - case 0x212B => Array(0x00E5) // ANGSTROM SIGN - case 0x2132 => Array(0x214E) // TURNED CAPITAL F - case 0x2160 => Array(0x2170) // ROMAN NUMERAL ONE - case 0x2161 => Array(0x2171) // ROMAN NUMERAL TWO - case 0x2162 => Array(0x2172) // ROMAN NUMERAL THREE - case 0x2163 => Array(0x2173) // ROMAN NUMERAL FOUR - case 0x2164 => Array(0x2174) // ROMAN NUMERAL FIVE - case 0x2165 => Array(0x2175) // ROMAN NUMERAL SIX - case 0x2166 => Array(0x2176) // ROMAN NUMERAL SEVEN - case 0x2167 => Array(0x2177) // ROMAN NUMERAL EIGHT - case 0x2168 => Array(0x2178) // ROMAN NUMERAL NINE - case 0x2169 => Array(0x2179) // ROMAN NUMERAL TEN - case 0x216A => Array(0x217A) // ROMAN NUMERAL ELEVEN - case 0x216B => Array(0x217B) // ROMAN NUMERAL TWELVE - case 0x216C => Array(0x217C) // ROMAN NUMERAL FIFTY - case 0x216D => Array(0x217D) // ROMAN NUMERAL ONE HUNDRED - case 0x216E => Array(0x217E) // ROMAN NUMERAL FIVE HUNDRED - case 0x216F => Array(0x217F) // ROMAN NUMERAL ONE THOUSAND - case 0x2183 => Array(0x2184) // ROMAN NUMERAL REVERSED ONE HUNDRED - case 0x24B6 => Array(0x24D0) // CIRCLED LATIN CAPITAL LETTER A - case 0x24B7 => Array(0x24D1) // CIRCLED LATIN CAPITAL LETTER B - case 0x24B8 => Array(0x24D2) // CIRCLED LATIN CAPITAL LETTER C - case 0x24B9 => Array(0x24D3) // CIRCLED LATIN CAPITAL LETTER D - case 0x24BA => Array(0x24D4) // CIRCLED LATIN CAPITAL LETTER E - case 0x24BB => Array(0x24D5) // CIRCLED LATIN CAPITAL LETTER F - case 0x24BC => Array(0x24D6) // CIRCLED LATIN CAPITAL LETTER G - case 0x24BD => Array(0x24D7) // CIRCLED LATIN CAPITAL LETTER H - case 0x24BE => Array(0x24D8) // CIRCLED LATIN CAPITAL LETTER I - case 0x24BF => Array(0x24D9) // CIRCLED LATIN CAPITAL LETTER J - case 0x24C0 => Array(0x24DA) // CIRCLED LATIN CAPITAL LETTER K - case 0x24C1 => Array(0x24DB) // CIRCLED LATIN CAPITAL LETTER L - case 0x24C2 => Array(0x24DC) // CIRCLED LATIN CAPITAL LETTER M - case 0x24C3 => Array(0x24DD) // CIRCLED LATIN CAPITAL LETTER N - case 0x24C4 => Array(0x24DE) // CIRCLED LATIN CAPITAL LETTER O - case 0x24C5 => Array(0x24DF) // CIRCLED LATIN CAPITAL LETTER P - case 0x24C6 => Array(0x24E0) // CIRCLED LATIN CAPITAL LETTER Q - case 0x24C7 => Array(0x24E1) // CIRCLED LATIN CAPITAL LETTER R - case 0x24C8 => Array(0x24E2) // CIRCLED LATIN CAPITAL LETTER S - case 0x24C9 => Array(0x24E3) // CIRCLED LATIN CAPITAL LETTER T - case 0x24CA => Array(0x24E4) // CIRCLED LATIN CAPITAL LETTER U - case 0x24CB => Array(0x24E5) // CIRCLED LATIN CAPITAL LETTER V - case 0x24CC => Array(0x24E6) // CIRCLED LATIN CAPITAL LETTER W - case 0x24CD => Array(0x24E7) // CIRCLED LATIN CAPITAL LETTER X - case 0x24CE => Array(0x24E8) // CIRCLED LATIN CAPITAL LETTER Y - case 0x24CF => Array(0x24E9) // CIRCLED LATIN CAPITAL LETTER Z - case 0x2C00 => Array(0x2C30) // GLAGOLITIC CAPITAL LETTER AZU - case 0x2C01 => Array(0x2C31) // GLAGOLITIC CAPITAL LETTER BUKY - case 0x2C02 => Array(0x2C32) // GLAGOLITIC CAPITAL LETTER VEDE - case 0x2C03 => Array(0x2C33) // GLAGOLITIC CAPITAL LETTER GLAGOLI - case 0x2C04 => Array(0x2C34) // GLAGOLITIC CAPITAL LETTER DOBRO - case 0x2C05 => Array(0x2C35) // GLAGOLITIC CAPITAL LETTER YESTU - case 0x2C06 => Array(0x2C36) // GLAGOLITIC CAPITAL LETTER ZHIVETE - case 0x2C07 => Array(0x2C37) // GLAGOLITIC CAPITAL LETTER DZELO - case 0x2C08 => Array(0x2C38) // GLAGOLITIC CAPITAL LETTER ZEMLJA - case 0x2C09 => Array(0x2C39) // GLAGOLITIC CAPITAL LETTER IZHE - case 0x2C0A => Array(0x2C3A) // GLAGOLITIC CAPITAL LETTER INITIAL IZHE - case 0x2C0B => Array(0x2C3B) // GLAGOLITIC CAPITAL LETTER I - case 0x2C0C => Array(0x2C3C) // GLAGOLITIC CAPITAL LETTER DJERVI - case 0x2C0D => Array(0x2C3D) // GLAGOLITIC CAPITAL LETTER KAKO - case 0x2C0E => Array(0x2C3E) // GLAGOLITIC CAPITAL LETTER LJUDIJE - case 0x2C0F => Array(0x2C3F) // GLAGOLITIC CAPITAL LETTER MYSLITE - case 0x2C10 => Array(0x2C40) // GLAGOLITIC CAPITAL LETTER NASHI - case 0x2C11 => Array(0x2C41) // GLAGOLITIC CAPITAL LETTER ONU - case 0x2C12 => Array(0x2C42) // GLAGOLITIC CAPITAL LETTER POKOJI - case 0x2C13 => Array(0x2C43) // GLAGOLITIC CAPITAL LETTER RITSI - case 0x2C14 => Array(0x2C44) // GLAGOLITIC CAPITAL LETTER SLOVO - case 0x2C15 => Array(0x2C45) // GLAGOLITIC CAPITAL LETTER TVRIDO - case 0x2C16 => Array(0x2C46) // GLAGOLITIC CAPITAL LETTER UKU - case 0x2C17 => Array(0x2C47) // GLAGOLITIC CAPITAL LETTER FRITU - case 0x2C18 => Array(0x2C48) // GLAGOLITIC CAPITAL LETTER HERU - case 0x2C19 => Array(0x2C49) // GLAGOLITIC CAPITAL LETTER OTU - case 0x2C1A => Array(0x2C4A) // GLAGOLITIC CAPITAL LETTER PE - case 0x2C1B => Array(0x2C4B) // GLAGOLITIC CAPITAL LETTER SHTA - case 0x2C1C => Array(0x2C4C) // GLAGOLITIC CAPITAL LETTER TSI - case 0x2C1D => Array(0x2C4D) // GLAGOLITIC CAPITAL LETTER CHRIVI - case 0x2C1E => Array(0x2C4E) // GLAGOLITIC CAPITAL LETTER SHA - case 0x2C1F => Array(0x2C4F) // GLAGOLITIC CAPITAL LETTER YERU - case 0x2C20 => Array(0x2C50) // GLAGOLITIC CAPITAL LETTER YERI - case 0x2C21 => Array(0x2C51) // GLAGOLITIC CAPITAL LETTER YATI - case 0x2C22 => Array(0x2C52) // GLAGOLITIC CAPITAL LETTER SPIDERY HA - case 0x2C23 => Array(0x2C53) // GLAGOLITIC CAPITAL LETTER YU - case 0x2C24 => Array(0x2C54) // GLAGOLITIC CAPITAL LETTER SMALL YUS - case 0x2C25 => Array(0x2C55) // GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL - case 0x2C26 => Array(0x2C56) // GLAGOLITIC CAPITAL LETTER YO - case 0x2C27 => Array(0x2C57) // GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS - case 0x2C28 => Array(0x2C58) // GLAGOLITIC CAPITAL LETTER BIG YUS - case 0x2C29 => Array(0x2C59) // GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS - case 0x2C2A => Array(0x2C5A) // GLAGOLITIC CAPITAL LETTER FITA - case 0x2C2B => Array(0x2C5B) // GLAGOLITIC CAPITAL LETTER IZHITSA - case 0x2C2C => Array(0x2C5C) // GLAGOLITIC CAPITAL LETTER SHTAPIC - case 0x2C2D => Array(0x2C5D) // GLAGOLITIC CAPITAL LETTER TROKUTASTI A - case 0x2C2E => Array(0x2C5E) // GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE - case 0x2C2F => Array(0x2C5F) // GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI - case 0x2C60 => Array(0x2C61) // LATIN CAPITAL LETTER L WITH DOUBLE BAR - case 0x2C62 => Array(0x026B) // LATIN CAPITAL LETTER L WITH MIDDLE TILDE - case 0x2C63 => Array(0x1D7D) // LATIN CAPITAL LETTER P WITH STROKE - case 0x2C64 => Array(0x027D) // LATIN CAPITAL LETTER R WITH TAIL - case 0x2C67 => Array(0x2C68) // LATIN CAPITAL LETTER H WITH DESCENDER - case 0x2C69 => Array(0x2C6A) // LATIN CAPITAL LETTER K WITH DESCENDER - case 0x2C6B => Array(0x2C6C) // LATIN CAPITAL LETTER Z WITH DESCENDER - case 0x2C6D => Array(0x0251) // LATIN CAPITAL LETTER ALPHA - case 0x2C6E => Array(0x0271) // LATIN CAPITAL LETTER M WITH HOOK - case 0x2C6F => Array(0x0250) // LATIN CAPITAL LETTER TURNED A - case 0x2C70 => Array(0x0252) // LATIN CAPITAL LETTER TURNED ALPHA - case 0x2C72 => Array(0x2C73) // LATIN CAPITAL LETTER W WITH HOOK - case 0x2C75 => Array(0x2C76) // LATIN CAPITAL LETTER HALF H - case 0x2C7E => Array(0x023F) // LATIN CAPITAL LETTER S WITH SWASH TAIL - case 0x2C7F => Array(0x0240) // LATIN CAPITAL LETTER Z WITH SWASH TAIL - case 0x2C80 => Array(0x2C81) // COPTIC CAPITAL LETTER ALFA - case 0x2C82 => Array(0x2C83) // COPTIC CAPITAL LETTER VIDA - case 0x2C84 => Array(0x2C85) // COPTIC CAPITAL LETTER GAMMA - case 0x2C86 => Array(0x2C87) // COPTIC CAPITAL LETTER DALDA - case 0x2C88 => Array(0x2C89) // COPTIC CAPITAL LETTER EIE - case 0x2C8A => Array(0x2C8B) // COPTIC CAPITAL LETTER SOU - case 0x2C8C => Array(0x2C8D) // COPTIC CAPITAL LETTER ZATA - case 0x2C8E => Array(0x2C8F) // COPTIC CAPITAL LETTER HATE - case 0x2C90 => Array(0x2C91) // COPTIC CAPITAL LETTER THETHE - case 0x2C92 => Array(0x2C93) // COPTIC CAPITAL LETTER IAUDA - case 0x2C94 => Array(0x2C95) // COPTIC CAPITAL LETTER KAPA - case 0x2C96 => Array(0x2C97) // COPTIC CAPITAL LETTER LAULA - case 0x2C98 => Array(0x2C99) // COPTIC CAPITAL LETTER MI - case 0x2C9A => Array(0x2C9B) // COPTIC CAPITAL LETTER NI - case 0x2C9C => Array(0x2C9D) // COPTIC CAPITAL LETTER KSI - case 0x2C9E => Array(0x2C9F) // COPTIC CAPITAL LETTER O - case 0x2CA0 => Array(0x2CA1) // COPTIC CAPITAL LETTER PI - case 0x2CA2 => Array(0x2CA3) // COPTIC CAPITAL LETTER RO - case 0x2CA4 => Array(0x2CA5) // COPTIC CAPITAL LETTER SIMA - case 0x2CA6 => Array(0x2CA7) // COPTIC CAPITAL LETTER TAU - case 0x2CA8 => Array(0x2CA9) // COPTIC CAPITAL LETTER UA - case 0x2CAA => Array(0x2CAB) // COPTIC CAPITAL LETTER FI - case 0x2CAC => Array(0x2CAD) // COPTIC CAPITAL LETTER KHI - case 0x2CAE => Array(0x2CAF) // COPTIC CAPITAL LETTER PSI - case 0x2CB0 => Array(0x2CB1) // COPTIC CAPITAL LETTER OOU - case 0x2CB2 => Array(0x2CB3) // COPTIC CAPITAL LETTER DIALECT-P ALEF - case 0x2CB4 => Array(0x2CB5) // COPTIC CAPITAL LETTER OLD COPTIC AIN - case 0x2CB6 => Array(0x2CB7) // COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE - case 0x2CB8 => Array(0x2CB9) // COPTIC CAPITAL LETTER DIALECT-P KAPA - case 0x2CBA => Array(0x2CBB) // COPTIC CAPITAL LETTER DIALECT-P NI - case 0x2CBC => Array(0x2CBD) // COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI - case 0x2CBE => Array(0x2CBF) // COPTIC CAPITAL LETTER OLD COPTIC OOU - case 0x2CC0 => Array(0x2CC1) // COPTIC CAPITAL LETTER SAMPI - case 0x2CC2 => Array(0x2CC3) // COPTIC CAPITAL LETTER CROSSED SHEI - case 0x2CC4 => Array(0x2CC5) // COPTIC CAPITAL LETTER OLD COPTIC SHEI - case 0x2CC6 => Array(0x2CC7) // COPTIC CAPITAL LETTER OLD COPTIC ESH - case 0x2CC8 => Array(0x2CC9) // COPTIC CAPITAL LETTER AKHMIMIC KHEI - case 0x2CCA => Array(0x2CCB) // COPTIC CAPITAL LETTER DIALECT-P HORI - case 0x2CCC => Array(0x2CCD) // COPTIC CAPITAL LETTER OLD COPTIC HORI - case 0x2CCE => Array(0x2CCF) // COPTIC CAPITAL LETTER OLD COPTIC HA - case 0x2CD0 => Array(0x2CD1) // COPTIC CAPITAL LETTER L-SHAPED HA - case 0x2CD2 => Array(0x2CD3) // COPTIC CAPITAL LETTER OLD COPTIC HEI - case 0x2CD4 => Array(0x2CD5) // COPTIC CAPITAL LETTER OLD COPTIC HAT - case 0x2CD6 => Array(0x2CD7) // COPTIC CAPITAL LETTER OLD COPTIC GANGIA - case 0x2CD8 => Array(0x2CD9) // COPTIC CAPITAL LETTER OLD COPTIC DJA - case 0x2CDA => Array(0x2CDB) // COPTIC CAPITAL LETTER OLD COPTIC SHIMA - case 0x2CDC => Array(0x2CDD) // COPTIC CAPITAL LETTER OLD NUBIAN SHIMA - case 0x2CDE => Array(0x2CDF) // COPTIC CAPITAL LETTER OLD NUBIAN NGI - case 0x2CE0 => Array(0x2CE1) // COPTIC CAPITAL LETTER OLD NUBIAN NYI - case 0x2CE2 => Array(0x2CE3) // COPTIC CAPITAL LETTER OLD NUBIAN WAU - case 0x2CEB => Array(0x2CEC) // COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI - case 0x2CED => Array(0x2CEE) // COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA - case 0x2CF2 => Array(0x2CF3) // COPTIC CAPITAL LETTER BOHAIRIC KHEI - case 0xA640 => Array(0xA641) // CYRILLIC CAPITAL LETTER ZEMLYA - case 0xA642 => Array(0xA643) // CYRILLIC CAPITAL LETTER DZELO - case 0xA644 => Array(0xA645) // CYRILLIC CAPITAL LETTER REVERSED DZE - case 0xA646 => Array(0xA647) // CYRILLIC CAPITAL LETTER IOTA - case 0xA648 => Array(0xA649) // CYRILLIC CAPITAL LETTER DJERV - case 0xA64A => Array(0xA64B) // CYRILLIC CAPITAL LETTER MONOGRAPH UK - case 0xA64C => Array(0xA64D) // CYRILLIC CAPITAL LETTER BROAD OMEGA - case 0xA64E => Array(0xA64F) // CYRILLIC CAPITAL LETTER NEUTRAL YER - case 0xA650 => Array(0xA651) // CYRILLIC CAPITAL LETTER YERU WITH BACK YER - case 0xA652 => Array(0xA653) // CYRILLIC CAPITAL LETTER IOTIFIED YAT - case 0xA654 => Array(0xA655) // CYRILLIC CAPITAL LETTER REVERSED YU - case 0xA656 => Array(0xA657) // CYRILLIC CAPITAL LETTER IOTIFIED A - case 0xA658 => Array(0xA659) // CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS - case 0xA65A => Array(0xA65B) // CYRILLIC CAPITAL LETTER BLENDED YUS - case 0xA65C => Array(0xA65D) // CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS - case 0xA65E => Array(0xA65F) // CYRILLIC CAPITAL LETTER YN - case 0xA660 => Array(0xA661) // CYRILLIC CAPITAL LETTER REVERSED TSE - case 0xA662 => Array(0xA663) // CYRILLIC CAPITAL LETTER SOFT DE - case 0xA664 => Array(0xA665) // CYRILLIC CAPITAL LETTER SOFT EL - case 0xA666 => Array(0xA667) // CYRILLIC CAPITAL LETTER SOFT EM - case 0xA668 => Array(0xA669) // CYRILLIC CAPITAL LETTER MONOCULAR O - case 0xA66A => Array(0xA66B) // CYRILLIC CAPITAL LETTER BINOCULAR O - case 0xA66C => Array(0xA66D) // CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O - case 0xA680 => Array(0xA681) // CYRILLIC CAPITAL LETTER DWE - case 0xA682 => Array(0xA683) // CYRILLIC CAPITAL LETTER DZWE - case 0xA684 => Array(0xA685) // CYRILLIC CAPITAL LETTER ZHWE - case 0xA686 => Array(0xA687) // CYRILLIC CAPITAL LETTER CCHE - case 0xA688 => Array(0xA689) // CYRILLIC CAPITAL LETTER DZZE - case 0xA68A => Array(0xA68B) // CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK - case 0xA68C => Array(0xA68D) // CYRILLIC CAPITAL LETTER TWE - case 0xA68E => Array(0xA68F) // CYRILLIC CAPITAL LETTER TSWE - case 0xA690 => Array(0xA691) // CYRILLIC CAPITAL LETTER TSSE - case 0xA692 => Array(0xA693) // CYRILLIC CAPITAL LETTER TCHE - case 0xA694 => Array(0xA695) // CYRILLIC CAPITAL LETTER HWE - case 0xA696 => Array(0xA697) // CYRILLIC CAPITAL LETTER SHWE - case 0xA698 => Array(0xA699) // CYRILLIC CAPITAL LETTER DOUBLE O - case 0xA69A => Array(0xA69B) // CYRILLIC CAPITAL LETTER CROSSED O - case 0xA722 => Array(0xA723) // LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF - case 0xA724 => Array(0xA725) // LATIN CAPITAL LETTER EGYPTOLOGICAL AIN - case 0xA726 => Array(0xA727) // LATIN CAPITAL LETTER HENG - case 0xA728 => Array(0xA729) // LATIN CAPITAL LETTER TZ - case 0xA72A => Array(0xA72B) // LATIN CAPITAL LETTER TRESILLO - case 0xA72C => Array(0xA72D) // LATIN CAPITAL LETTER CUATRILLO - case 0xA72E => Array(0xA72F) // LATIN CAPITAL LETTER CUATRILLO WITH COMMA - case 0xA732 => Array(0xA733) // LATIN CAPITAL LETTER AA - case 0xA734 => Array(0xA735) // LATIN CAPITAL LETTER AO - case 0xA736 => Array(0xA737) // LATIN CAPITAL LETTER AU - case 0xA738 => Array(0xA739) // LATIN CAPITAL LETTER AV - case 0xA73A => Array(0xA73B) // LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR - case 0xA73C => Array(0xA73D) // LATIN CAPITAL LETTER AY - case 0xA73E => Array(0xA73F) // LATIN CAPITAL LETTER REVERSED C WITH DOT - case 0xA740 => Array(0xA741) // LATIN CAPITAL LETTER K WITH STROKE - case 0xA742 => Array(0xA743) // LATIN CAPITAL LETTER K WITH DIAGONAL STROKE - case 0xA744 => Array(0xA745) // LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE - case 0xA746 => Array(0xA747) // LATIN CAPITAL LETTER BROKEN L - case 0xA748 => Array(0xA749) // LATIN CAPITAL LETTER L WITH HIGH STROKE - case 0xA74A => Array(0xA74B) // LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY - case 0xA74C => Array(0xA74D) // LATIN CAPITAL LETTER O WITH LOOP - case 0xA74E => Array(0xA74F) // LATIN CAPITAL LETTER OO - case 0xA750 => Array(0xA751) // LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER - case 0xA752 => Array(0xA753) // LATIN CAPITAL LETTER P WITH FLOURISH - case 0xA754 => Array(0xA755) // LATIN CAPITAL LETTER P WITH SQUIRREL TAIL - case 0xA756 => Array(0xA757) // LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER - case 0xA758 => Array(0xA759) // LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE - case 0xA75A => Array(0xA75B) // LATIN CAPITAL LETTER R ROTUNDA - case 0xA75C => Array(0xA75D) // LATIN CAPITAL LETTER RUM ROTUNDA - case 0xA75E => Array(0xA75F) // LATIN CAPITAL LETTER V WITH DIAGONAL STROKE - case 0xA760 => Array(0xA761) // LATIN CAPITAL LETTER VY - case 0xA762 => Array(0xA763) // LATIN CAPITAL LETTER VISIGOTHIC Z - case 0xA764 => Array(0xA765) // LATIN CAPITAL LETTER THORN WITH STROKE - case 0xA766 => Array(0xA767) // LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER - case 0xA768 => Array(0xA769) // LATIN CAPITAL LETTER VEND - case 0xA76A => Array(0xA76B) // LATIN CAPITAL LETTER ET - case 0xA76C => Array(0xA76D) // LATIN CAPITAL LETTER IS - case 0xA76E => Array(0xA76F) // LATIN CAPITAL LETTER CON - case 0xA779 => Array(0xA77A) // LATIN CAPITAL LETTER INSULAR D - case 0xA77B => Array(0xA77C) // LATIN CAPITAL LETTER INSULAR F - case 0xA77D => Array(0x1D79) // LATIN CAPITAL LETTER INSULAR G - case 0xA77E => Array(0xA77F) // LATIN CAPITAL LETTER TURNED INSULAR G - case 0xA780 => Array(0xA781) // LATIN CAPITAL LETTER TURNED L - case 0xA782 => Array(0xA783) // LATIN CAPITAL LETTER INSULAR R - case 0xA784 => Array(0xA785) // LATIN CAPITAL LETTER INSULAR S - case 0xA786 => Array(0xA787) // LATIN CAPITAL LETTER INSULAR T - case 0xA78B => Array(0xA78C) // LATIN CAPITAL LETTER SALTILLO - case 0xA78D => Array(0x0265) // LATIN CAPITAL LETTER TURNED H - case 0xA790 => Array(0xA791) // LATIN CAPITAL LETTER N WITH DESCENDER - case 0xA792 => Array(0xA793) // LATIN CAPITAL LETTER C WITH BAR - case 0xA796 => Array(0xA797) // LATIN CAPITAL LETTER B WITH FLOURISH - case 0xA798 => Array(0xA799) // LATIN CAPITAL LETTER F WITH STROKE - case 0xA79A => Array(0xA79B) // LATIN CAPITAL LETTER VOLAPUK AE - case 0xA79C => Array(0xA79D) // LATIN CAPITAL LETTER VOLAPUK OE - case 0xA79E => Array(0xA79F) // LATIN CAPITAL LETTER VOLAPUK UE - case 0xA7A0 => Array(0xA7A1) // LATIN CAPITAL LETTER G WITH OBLIQUE STROKE - case 0xA7A2 => Array(0xA7A3) // LATIN CAPITAL LETTER K WITH OBLIQUE STROKE - case 0xA7A4 => Array(0xA7A5) // LATIN CAPITAL LETTER N WITH OBLIQUE STROKE - case 0xA7A6 => Array(0xA7A7) // LATIN CAPITAL LETTER R WITH OBLIQUE STROKE - case 0xA7A8 => Array(0xA7A9) // LATIN CAPITAL LETTER S WITH OBLIQUE STROKE - case 0xA7AA => Array(0x0266) // LATIN CAPITAL LETTER H WITH HOOK - case 0xA7AB => Array(0x025C) // LATIN CAPITAL LETTER REVERSED OPEN E - case 0xA7AC => Array(0x0261) // LATIN CAPITAL LETTER SCRIPT G - case 0xA7AD => Array(0x026C) // LATIN CAPITAL LETTER L WITH BELT - case 0xA7AE => Array(0x026A) // LATIN CAPITAL LETTER SMALL CAPITAL I - case 0xA7B0 => Array(0x029E) // LATIN CAPITAL LETTER TURNED K - case 0xA7B1 => Array(0x0287) // LATIN CAPITAL LETTER TURNED T - case 0xA7B2 => Array(0x029D) // LATIN CAPITAL LETTER J WITH CROSSED-TAIL - case 0xA7B3 => Array(0xAB53) // LATIN CAPITAL LETTER CHI - case 0xA7B4 => Array(0xA7B5) // LATIN CAPITAL LETTER BETA - case 0xA7B6 => Array(0xA7B7) // LATIN CAPITAL LETTER OMEGA - case 0xA7B8 => Array(0xA7B9) // LATIN CAPITAL LETTER U WITH STROKE - case 0xA7BA => Array(0xA7BB) // LATIN CAPITAL LETTER GLOTTAL A - case 0xA7BC => Array(0xA7BD) // LATIN CAPITAL LETTER GLOTTAL I - case 0xA7BE => Array(0xA7BF) // LATIN CAPITAL LETTER GLOTTAL U - case 0xA7C0 => Array(0xA7C1) // LATIN CAPITAL LETTER OLD POLISH O - case 0xA7C2 => Array(0xA7C3) // LATIN CAPITAL LETTER ANGLICANA W - case 0xA7C4 => Array(0xA794) // LATIN CAPITAL LETTER C WITH PALATAL HOOK - case 0xA7C5 => Array(0x0282) // LATIN CAPITAL LETTER S WITH HOOK - case 0xA7C6 => Array(0x1D8E) // LATIN CAPITAL LETTER Z WITH PALATAL HOOK - case 0xA7C7 => Array(0xA7C8) // LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY - case 0xA7C9 => Array(0xA7CA) // LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY - case 0xA7D0 => Array(0xA7D1) // LATIN CAPITAL LETTER CLOSED INSULAR G - case 0xA7D6 => Array(0xA7D7) // LATIN CAPITAL LETTER MIDDLE SCOTS S - case 0xA7D8 => Array(0xA7D9) // LATIN CAPITAL LETTER SIGMOID S - case 0xA7F5 => Array(0xA7F6) // LATIN CAPITAL LETTER REVERSED HALF H - case 0xAB70 => Array(0x13A0) // CHEROKEE SMALL LETTER A - case 0xAB71 => Array(0x13A1) // CHEROKEE SMALL LETTER E - case 0xAB72 => Array(0x13A2) // CHEROKEE SMALL LETTER I - case 0xAB73 => Array(0x13A3) // CHEROKEE SMALL LETTER O - case 0xAB74 => Array(0x13A4) // CHEROKEE SMALL LETTER U - case 0xAB75 => Array(0x13A5) // CHEROKEE SMALL LETTER V - case 0xAB76 => Array(0x13A6) // CHEROKEE SMALL LETTER GA - case 0xAB77 => Array(0x13A7) // CHEROKEE SMALL LETTER KA - case 0xAB78 => Array(0x13A8) // CHEROKEE SMALL LETTER GE - case 0xAB79 => Array(0x13A9) // CHEROKEE SMALL LETTER GI - case 0xAB7A => Array(0x13AA) // CHEROKEE SMALL LETTER GO - case 0xAB7B => Array(0x13AB) // CHEROKEE SMALL LETTER GU - case 0xAB7C => Array(0x13AC) // CHEROKEE SMALL LETTER GV - case 0xAB7D => Array(0x13AD) // CHEROKEE SMALL LETTER HA - case 0xAB7E => Array(0x13AE) // CHEROKEE SMALL LETTER HE - case 0xAB7F => Array(0x13AF) // CHEROKEE SMALL LETTER HI - case 0xAB80 => Array(0x13B0) // CHEROKEE SMALL LETTER HO - case 0xAB81 => Array(0x13B1) // CHEROKEE SMALL LETTER HU - case 0xAB82 => Array(0x13B2) // CHEROKEE SMALL LETTER HV - case 0xAB83 => Array(0x13B3) // CHEROKEE SMALL LETTER LA - case 0xAB84 => Array(0x13B4) // CHEROKEE SMALL LETTER LE - case 0xAB85 => Array(0x13B5) // CHEROKEE SMALL LETTER LI - case 0xAB86 => Array(0x13B6) // CHEROKEE SMALL LETTER LO - case 0xAB87 => Array(0x13B7) // CHEROKEE SMALL LETTER LU - case 0xAB88 => Array(0x13B8) // CHEROKEE SMALL LETTER LV - case 0xAB89 => Array(0x13B9) // CHEROKEE SMALL LETTER MA - case 0xAB8A => Array(0x13BA) // CHEROKEE SMALL LETTER ME - case 0xAB8B => Array(0x13BB) // CHEROKEE SMALL LETTER MI - case 0xAB8C => Array(0x13BC) // CHEROKEE SMALL LETTER MO - case 0xAB8D => Array(0x13BD) // CHEROKEE SMALL LETTER MU - case 0xAB8E => Array(0x13BE) // CHEROKEE SMALL LETTER NA - case 0xAB8F => Array(0x13BF) // CHEROKEE SMALL LETTER HNA - case 0xAB90 => Array(0x13C0) // CHEROKEE SMALL LETTER NAH - case 0xAB91 => Array(0x13C1) // CHEROKEE SMALL LETTER NE - case 0xAB92 => Array(0x13C2) // CHEROKEE SMALL LETTER NI - case 0xAB93 => Array(0x13C3) // CHEROKEE SMALL LETTER NO - case 0xAB94 => Array(0x13C4) // CHEROKEE SMALL LETTER NU - case 0xAB95 => Array(0x13C5) // CHEROKEE SMALL LETTER NV - case 0xAB96 => Array(0x13C6) // CHEROKEE SMALL LETTER QUA - case 0xAB97 => Array(0x13C7) // CHEROKEE SMALL LETTER QUE - case 0xAB98 => Array(0x13C8) // CHEROKEE SMALL LETTER QUI - case 0xAB99 => Array(0x13C9) // CHEROKEE SMALL LETTER QUO - case 0xAB9A => Array(0x13CA) // CHEROKEE SMALL LETTER QUU - case 0xAB9B => Array(0x13CB) // CHEROKEE SMALL LETTER QUV - case 0xAB9C => Array(0x13CC) // CHEROKEE SMALL LETTER SA - case 0xAB9D => Array(0x13CD) // CHEROKEE SMALL LETTER S - case 0xAB9E => Array(0x13CE) // CHEROKEE SMALL LETTER SE - case 0xAB9F => Array(0x13CF) // CHEROKEE SMALL LETTER SI - case 0xABA0 => Array(0x13D0) // CHEROKEE SMALL LETTER SO - case 0xABA1 => Array(0x13D1) // CHEROKEE SMALL LETTER SU - case 0xABA2 => Array(0x13D2) // CHEROKEE SMALL LETTER SV - case 0xABA3 => Array(0x13D3) // CHEROKEE SMALL LETTER DA - case 0xABA4 => Array(0x13D4) // CHEROKEE SMALL LETTER TA - case 0xABA5 => Array(0x13D5) // CHEROKEE SMALL LETTER DE - case 0xABA6 => Array(0x13D6) // CHEROKEE SMALL LETTER TE - case 0xABA7 => Array(0x13D7) // CHEROKEE SMALL LETTER DI - case 0xABA8 => Array(0x13D8) // CHEROKEE SMALL LETTER TI - case 0xABA9 => Array(0x13D9) // CHEROKEE SMALL LETTER DO - case 0xABAA => Array(0x13DA) // CHEROKEE SMALL LETTER DU - case 0xABAB => Array(0x13DB) // CHEROKEE SMALL LETTER DV - case 0xABAC => Array(0x13DC) // CHEROKEE SMALL LETTER DLA - case 0xABAD => Array(0x13DD) // CHEROKEE SMALL LETTER TLA - case 0xABAE => Array(0x13DE) // CHEROKEE SMALL LETTER TLE - case 0xABAF => Array(0x13DF) // CHEROKEE SMALL LETTER TLI - case 0xABB0 => Array(0x13E0) // CHEROKEE SMALL LETTER TLO - case 0xABB1 => Array(0x13E1) // CHEROKEE SMALL LETTER TLU - case 0xABB2 => Array(0x13E2) // CHEROKEE SMALL LETTER TLV - case 0xABB3 => Array(0x13E3) // CHEROKEE SMALL LETTER TSA - case 0xABB4 => Array(0x13E4) // CHEROKEE SMALL LETTER TSE - case 0xABB5 => Array(0x13E5) // CHEROKEE SMALL LETTER TSI - case 0xABB6 => Array(0x13E6) // CHEROKEE SMALL LETTER TSO - case 0xABB7 => Array(0x13E7) // CHEROKEE SMALL LETTER TSU - case 0xABB8 => Array(0x13E8) // CHEROKEE SMALL LETTER TSV - case 0xABB9 => Array(0x13E9) // CHEROKEE SMALL LETTER WA - case 0xABBA => Array(0x13EA) // CHEROKEE SMALL LETTER WE - case 0xABBB => Array(0x13EB) // CHEROKEE SMALL LETTER WI - case 0xABBC => Array(0x13EC) // CHEROKEE SMALL LETTER WO - case 0xABBD => Array(0x13ED) // CHEROKEE SMALL LETTER WU - case 0xABBE => Array(0x13EE) // CHEROKEE SMALL LETTER WV - case 0xABBF => Array(0x13EF) // CHEROKEE SMALL LETTER YA - case 0xFF21 => Array(0xFF41) // FULLWIDTH LATIN CAPITAL LETTER A - case 0xFF22 => Array(0xFF42) // FULLWIDTH LATIN CAPITAL LETTER B - case 0xFF23 => Array(0xFF43) // FULLWIDTH LATIN CAPITAL LETTER C - case 0xFF24 => Array(0xFF44) // FULLWIDTH LATIN CAPITAL LETTER D - case 0xFF25 => Array(0xFF45) // FULLWIDTH LATIN CAPITAL LETTER E - case 0xFF26 => Array(0xFF46) // FULLWIDTH LATIN CAPITAL LETTER F - case 0xFF27 => Array(0xFF47) // FULLWIDTH LATIN CAPITAL LETTER G - case 0xFF28 => Array(0xFF48) // FULLWIDTH LATIN CAPITAL LETTER H - case 0xFF29 => Array(0xFF49) // FULLWIDTH LATIN CAPITAL LETTER I - case 0xFF2A => Array(0xFF4A) // FULLWIDTH LATIN CAPITAL LETTER J - case 0xFF2B => Array(0xFF4B) // FULLWIDTH LATIN CAPITAL LETTER K - case 0xFF2C => Array(0xFF4C) // FULLWIDTH LATIN CAPITAL LETTER L - case 0xFF2D => Array(0xFF4D) // FULLWIDTH LATIN CAPITAL LETTER M - case 0xFF2E => Array(0xFF4E) // FULLWIDTH LATIN CAPITAL LETTER N - case 0xFF2F => Array(0xFF4F) // FULLWIDTH LATIN CAPITAL LETTER O - case 0xFF30 => Array(0xFF50) // FULLWIDTH LATIN CAPITAL LETTER P - case 0xFF31 => Array(0xFF51) // FULLWIDTH LATIN CAPITAL LETTER Q - case 0xFF32 => Array(0xFF52) // FULLWIDTH LATIN CAPITAL LETTER R - case 0xFF33 => Array(0xFF53) // FULLWIDTH LATIN CAPITAL LETTER S - case 0xFF34 => Array(0xFF54) // FULLWIDTH LATIN CAPITAL LETTER T - case 0xFF35 => Array(0xFF55) // FULLWIDTH LATIN CAPITAL LETTER U - case 0xFF36 => Array(0xFF56) // FULLWIDTH LATIN CAPITAL LETTER V - case 0xFF37 => Array(0xFF57) // FULLWIDTH LATIN CAPITAL LETTER W - case 0xFF38 => Array(0xFF58) // FULLWIDTH LATIN CAPITAL LETTER X - case 0xFF39 => Array(0xFF59) // FULLWIDTH LATIN CAPITAL LETTER Y - case 0xFF3A => Array(0xFF5A) // FULLWIDTH LATIN CAPITAL LETTER Z - case 0x10400 => Array(0x10428) // DESERET CAPITAL LETTER LONG I - case 0x10401 => Array(0x10429) // DESERET CAPITAL LETTER LONG E - case 0x10402 => Array(0x1042A) // DESERET CAPITAL LETTER LONG A - case 0x10403 => Array(0x1042B) // DESERET CAPITAL LETTER LONG AH - case 0x10404 => Array(0x1042C) // DESERET CAPITAL LETTER LONG O - case 0x10405 => Array(0x1042D) // DESERET CAPITAL LETTER LONG OO - case 0x10406 => Array(0x1042E) // DESERET CAPITAL LETTER SHORT I - case 0x10407 => Array(0x1042F) // DESERET CAPITAL LETTER SHORT E - case 0x10408 => Array(0x10430) // DESERET CAPITAL LETTER SHORT A - case 0x10409 => Array(0x10431) // DESERET CAPITAL LETTER SHORT AH - case 0x1040A => Array(0x10432) // DESERET CAPITAL LETTER SHORT O - case 0x1040B => Array(0x10433) // DESERET CAPITAL LETTER SHORT OO - case 0x1040C => Array(0x10434) // DESERET CAPITAL LETTER AY - case 0x1040D => Array(0x10435) // DESERET CAPITAL LETTER OW - case 0x1040E => Array(0x10436) // DESERET CAPITAL LETTER WU - case 0x1040F => Array(0x10437) // DESERET CAPITAL LETTER YEE - case 0x10410 => Array(0x10438) // DESERET CAPITAL LETTER H - case 0x10411 => Array(0x10439) // DESERET CAPITAL LETTER PEE - case 0x10412 => Array(0x1043A) // DESERET CAPITAL LETTER BEE - case 0x10413 => Array(0x1043B) // DESERET CAPITAL LETTER TEE - case 0x10414 => Array(0x1043C) // DESERET CAPITAL LETTER DEE - case 0x10415 => Array(0x1043D) // DESERET CAPITAL LETTER CHEE - case 0x10416 => Array(0x1043E) // DESERET CAPITAL LETTER JEE - case 0x10417 => Array(0x1043F) // DESERET CAPITAL LETTER KAY - case 0x10418 => Array(0x10440) // DESERET CAPITAL LETTER GAY - case 0x10419 => Array(0x10441) // DESERET CAPITAL LETTER EF - case 0x1041A => Array(0x10442) // DESERET CAPITAL LETTER VEE - case 0x1041B => Array(0x10443) // DESERET CAPITAL LETTER ETH - case 0x1041C => Array(0x10444) // DESERET CAPITAL LETTER THEE - case 0x1041D => Array(0x10445) // DESERET CAPITAL LETTER ES - case 0x1041E => Array(0x10446) // DESERET CAPITAL LETTER ZEE - case 0x1041F => Array(0x10447) // DESERET CAPITAL LETTER ESH - case 0x10420 => Array(0x10448) // DESERET CAPITAL LETTER ZHEE - case 0x10421 => Array(0x10449) // DESERET CAPITAL LETTER ER - case 0x10422 => Array(0x1044A) // DESERET CAPITAL LETTER EL - case 0x10423 => Array(0x1044B) // DESERET CAPITAL LETTER EM - case 0x10424 => Array(0x1044C) // DESERET CAPITAL LETTER EN - case 0x10425 => Array(0x1044D) // DESERET CAPITAL LETTER ENG - case 0x10426 => Array(0x1044E) // DESERET CAPITAL LETTER OI - case 0x10427 => Array(0x1044F) // DESERET CAPITAL LETTER EW - case 0x104B0 => Array(0x104D8) // OSAGE CAPITAL LETTER A - case 0x104B1 => Array(0x104D9) // OSAGE CAPITAL LETTER AI - case 0x104B2 => Array(0x104DA) // OSAGE CAPITAL LETTER AIN - case 0x104B3 => Array(0x104DB) // OSAGE CAPITAL LETTER AH - case 0x104B4 => Array(0x104DC) // OSAGE CAPITAL LETTER BRA - case 0x104B5 => Array(0x104DD) // OSAGE CAPITAL LETTER CHA - case 0x104B6 => Array(0x104DE) // OSAGE CAPITAL LETTER EHCHA - case 0x104B7 => Array(0x104DF) // OSAGE CAPITAL LETTER E - case 0x104B8 => Array(0x104E0) // OSAGE CAPITAL LETTER EIN - case 0x104B9 => Array(0x104E1) // OSAGE CAPITAL LETTER HA - case 0x104BA => Array(0x104E2) // OSAGE CAPITAL LETTER HYA - case 0x104BB => Array(0x104E3) // OSAGE CAPITAL LETTER I - case 0x104BC => Array(0x104E4) // OSAGE CAPITAL LETTER KA - case 0x104BD => Array(0x104E5) // OSAGE CAPITAL LETTER EHKA - case 0x104BE => Array(0x104E6) // OSAGE CAPITAL LETTER KYA - case 0x104BF => Array(0x104E7) // OSAGE CAPITAL LETTER LA - case 0x104C0 => Array(0x104E8) // OSAGE CAPITAL LETTER MA - case 0x104C1 => Array(0x104E9) // OSAGE CAPITAL LETTER NA - case 0x104C2 => Array(0x104EA) // OSAGE CAPITAL LETTER O - case 0x104C3 => Array(0x104EB) // OSAGE CAPITAL LETTER OIN - case 0x104C4 => Array(0x104EC) // OSAGE CAPITAL LETTER PA - case 0x104C5 => Array(0x104ED) // OSAGE CAPITAL LETTER EHPA - case 0x104C6 => Array(0x104EE) // OSAGE CAPITAL LETTER SA - case 0x104C7 => Array(0x104EF) // OSAGE CAPITAL LETTER SHA - case 0x104C8 => Array(0x104F0) // OSAGE CAPITAL LETTER TA - case 0x104C9 => Array(0x104F1) // OSAGE CAPITAL LETTER EHTA - case 0x104CA => Array(0x104F2) // OSAGE CAPITAL LETTER TSA - case 0x104CB => Array(0x104F3) // OSAGE CAPITAL LETTER EHTSA - case 0x104CC => Array(0x104F4) // OSAGE CAPITAL LETTER TSHA - case 0x104CD => Array(0x104F5) // OSAGE CAPITAL LETTER DHA - case 0x104CE => Array(0x104F6) // OSAGE CAPITAL LETTER U - case 0x104CF => Array(0x104F7) // OSAGE CAPITAL LETTER WA - case 0x104D0 => Array(0x104F8) // OSAGE CAPITAL LETTER KHA - case 0x104D1 => Array(0x104F9) // OSAGE CAPITAL LETTER GHA - case 0x104D2 => Array(0x104FA) // OSAGE CAPITAL LETTER ZA - case 0x104D3 => Array(0x104FB) // OSAGE CAPITAL LETTER ZHA - case 0x10570 => Array(0x10597) // VITHKUQI CAPITAL LETTER A - case 0x10571 => Array(0x10598) // VITHKUQI CAPITAL LETTER BBE - case 0x10572 => Array(0x10599) // VITHKUQI CAPITAL LETTER BE - case 0x10573 => Array(0x1059A) // VITHKUQI CAPITAL LETTER CE - case 0x10574 => Array(0x1059B) // VITHKUQI CAPITAL LETTER CHE - case 0x10575 => Array(0x1059C) // VITHKUQI CAPITAL LETTER DE - case 0x10576 => Array(0x1059D) // VITHKUQI CAPITAL LETTER DHE - case 0x10577 => Array(0x1059E) // VITHKUQI CAPITAL LETTER EI - case 0x10578 => Array(0x1059F) // VITHKUQI CAPITAL LETTER E - case 0x10579 => Array(0x105A0) // VITHKUQI CAPITAL LETTER FE - case 0x1057A => Array(0x105A1) // VITHKUQI CAPITAL LETTER GA - case 0x1057C => Array(0x105A3) // VITHKUQI CAPITAL LETTER HA - case 0x1057D => Array(0x105A4) // VITHKUQI CAPITAL LETTER HHA - case 0x1057E => Array(0x105A5) // VITHKUQI CAPITAL LETTER I - case 0x1057F => Array(0x105A6) // VITHKUQI CAPITAL LETTER IJE - case 0x10580 => Array(0x105A7) // VITHKUQI CAPITAL LETTER JE - case 0x10581 => Array(0x105A8) // VITHKUQI CAPITAL LETTER KA - case 0x10582 => Array(0x105A9) // VITHKUQI CAPITAL LETTER LA - case 0x10583 => Array(0x105AA) // VITHKUQI CAPITAL LETTER LLA - case 0x10584 => Array(0x105AB) // VITHKUQI CAPITAL LETTER ME - case 0x10585 => Array(0x105AC) // VITHKUQI CAPITAL LETTER NE - case 0x10586 => Array(0x105AD) // VITHKUQI CAPITAL LETTER NJE - case 0x10587 => Array(0x105AE) // VITHKUQI CAPITAL LETTER O - case 0x10588 => Array(0x105AF) // VITHKUQI CAPITAL LETTER PE - case 0x10589 => Array(0x105B0) // VITHKUQI CAPITAL LETTER QA - case 0x1058A => Array(0x105B1) // VITHKUQI CAPITAL LETTER RE - case 0x1058C => Array(0x105B3) // VITHKUQI CAPITAL LETTER SE - case 0x1058D => Array(0x105B4) // VITHKUQI CAPITAL LETTER SHE - case 0x1058E => Array(0x105B5) // VITHKUQI CAPITAL LETTER TE - case 0x1058F => Array(0x105B6) // VITHKUQI CAPITAL LETTER THE - case 0x10590 => Array(0x105B7) // VITHKUQI CAPITAL LETTER U - case 0x10591 => Array(0x105B8) // VITHKUQI CAPITAL LETTER VE - case 0x10592 => Array(0x105B9) // VITHKUQI CAPITAL LETTER XE - case 0x10594 => Array(0x105BB) // VITHKUQI CAPITAL LETTER Y - case 0x10595 => Array(0x105BC) // VITHKUQI CAPITAL LETTER ZE - case 0x10C80 => Array(0x10CC0) // OLD HUNGARIAN CAPITAL LETTER A - case 0x10C81 => Array(0x10CC1) // OLD HUNGARIAN CAPITAL LETTER AA - case 0x10C82 => Array(0x10CC2) // OLD HUNGARIAN CAPITAL LETTER EB - case 0x10C83 => Array(0x10CC3) // OLD HUNGARIAN CAPITAL LETTER AMB - case 0x10C84 => Array(0x10CC4) // OLD HUNGARIAN CAPITAL LETTER EC - case 0x10C85 => Array(0x10CC5) // OLD HUNGARIAN CAPITAL LETTER ENC - case 0x10C86 => Array(0x10CC6) // OLD HUNGARIAN CAPITAL LETTER ECS - case 0x10C87 => Array(0x10CC7) // OLD HUNGARIAN CAPITAL LETTER ED - case 0x10C88 => Array(0x10CC8) // OLD HUNGARIAN CAPITAL LETTER AND - case 0x10C89 => Array(0x10CC9) // OLD HUNGARIAN CAPITAL LETTER E - case 0x10C8A => Array(0x10CCA) // OLD HUNGARIAN CAPITAL LETTER CLOSE E - case 0x10C8B => Array(0x10CCB) // OLD HUNGARIAN CAPITAL LETTER EE - case 0x10C8C => Array(0x10CCC) // OLD HUNGARIAN CAPITAL LETTER EF - case 0x10C8D => Array(0x10CCD) // OLD HUNGARIAN CAPITAL LETTER EG - case 0x10C8E => Array(0x10CCE) // OLD HUNGARIAN CAPITAL LETTER EGY - case 0x10C8F => Array(0x10CCF) // OLD HUNGARIAN CAPITAL LETTER EH - case 0x10C90 => Array(0x10CD0) // OLD HUNGARIAN CAPITAL LETTER I - case 0x10C91 => Array(0x10CD1) // OLD HUNGARIAN CAPITAL LETTER II - case 0x10C92 => Array(0x10CD2) // OLD HUNGARIAN CAPITAL LETTER EJ - case 0x10C93 => Array(0x10CD3) // OLD HUNGARIAN CAPITAL LETTER EK - case 0x10C94 => Array(0x10CD4) // OLD HUNGARIAN CAPITAL LETTER AK - case 0x10C95 => Array(0x10CD5) // OLD HUNGARIAN CAPITAL LETTER UNK - case 0x10C96 => Array(0x10CD6) // OLD HUNGARIAN CAPITAL LETTER EL - case 0x10C97 => Array(0x10CD7) // OLD HUNGARIAN CAPITAL LETTER ELY - case 0x10C98 => Array(0x10CD8) // OLD HUNGARIAN CAPITAL LETTER EM - case 0x10C99 => Array(0x10CD9) // OLD HUNGARIAN CAPITAL LETTER EN - case 0x10C9A => Array(0x10CDA) // OLD HUNGARIAN CAPITAL LETTER ENY - case 0x10C9B => Array(0x10CDB) // OLD HUNGARIAN CAPITAL LETTER O - case 0x10C9C => Array(0x10CDC) // OLD HUNGARIAN CAPITAL LETTER OO - case 0x10C9D => Array(0x10CDD) // OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE - case 0x10C9E => Array(0x10CDE) // OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE - case 0x10C9F => Array(0x10CDF) // OLD HUNGARIAN CAPITAL LETTER OEE - case 0x10CA0 => Array(0x10CE0) // OLD HUNGARIAN CAPITAL LETTER EP - case 0x10CA1 => Array(0x10CE1) // OLD HUNGARIAN CAPITAL LETTER EMP - case 0x10CA2 => Array(0x10CE2) // OLD HUNGARIAN CAPITAL LETTER ER - case 0x10CA3 => Array(0x10CE3) // OLD HUNGARIAN CAPITAL LETTER SHORT ER - case 0x10CA4 => Array(0x10CE4) // OLD HUNGARIAN CAPITAL LETTER ES - case 0x10CA5 => Array(0x10CE5) // OLD HUNGARIAN CAPITAL LETTER ESZ - case 0x10CA6 => Array(0x10CE6) // OLD HUNGARIAN CAPITAL LETTER ET - case 0x10CA7 => Array(0x10CE7) // OLD HUNGARIAN CAPITAL LETTER ENT - case 0x10CA8 => Array(0x10CE8) // OLD HUNGARIAN CAPITAL LETTER ETY - case 0x10CA9 => Array(0x10CE9) // OLD HUNGARIAN CAPITAL LETTER ECH - case 0x10CAA => Array(0x10CEA) // OLD HUNGARIAN CAPITAL LETTER U - case 0x10CAB => Array(0x10CEB) // OLD HUNGARIAN CAPITAL LETTER UU - case 0x10CAC => Array(0x10CEC) // OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE - case 0x10CAD => Array(0x10CED) // OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE - case 0x10CAE => Array(0x10CEE) // OLD HUNGARIAN CAPITAL LETTER EV - case 0x10CAF => Array(0x10CEF) // OLD HUNGARIAN CAPITAL LETTER EZ - case 0x10CB0 => Array(0x10CF0) // OLD HUNGARIAN CAPITAL LETTER EZS - case 0x10CB1 => Array(0x10CF1) // OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN - case 0x10CB2 => Array(0x10CF2) // OLD HUNGARIAN CAPITAL LETTER US - case 0x118A0 => Array(0x118C0) // WARANG CITI CAPITAL LETTER NGAA - case 0x118A1 => Array(0x118C1) // WARANG CITI CAPITAL LETTER A - case 0x118A2 => Array(0x118C2) // WARANG CITI CAPITAL LETTER WI - case 0x118A3 => Array(0x118C3) // WARANG CITI CAPITAL LETTER YU - case 0x118A4 => Array(0x118C4) // WARANG CITI CAPITAL LETTER YA - case 0x118A5 => Array(0x118C5) // WARANG CITI CAPITAL LETTER YO - case 0x118A6 => Array(0x118C6) // WARANG CITI CAPITAL LETTER II - case 0x118A7 => Array(0x118C7) // WARANG CITI CAPITAL LETTER UU - case 0x118A8 => Array(0x118C8) // WARANG CITI CAPITAL LETTER E - case 0x118A9 => Array(0x118C9) // WARANG CITI CAPITAL LETTER O - case 0x118AA => Array(0x118CA) // WARANG CITI CAPITAL LETTER ANG - case 0x118AB => Array(0x118CB) // WARANG CITI CAPITAL LETTER GA - case 0x118AC => Array(0x118CC) // WARANG CITI CAPITAL LETTER KO - case 0x118AD => Array(0x118CD) // WARANG CITI CAPITAL LETTER ENY - case 0x118AE => Array(0x118CE) // WARANG CITI CAPITAL LETTER YUJ - case 0x118AF => Array(0x118CF) // WARANG CITI CAPITAL LETTER UC - case 0x118B0 => Array(0x118D0) // WARANG CITI CAPITAL LETTER ENN - case 0x118B1 => Array(0x118D1) // WARANG CITI CAPITAL LETTER ODD - case 0x118B2 => Array(0x118D2) // WARANG CITI CAPITAL LETTER TTE - case 0x118B3 => Array(0x118D3) // WARANG CITI CAPITAL LETTER NUNG - case 0x118B4 => Array(0x118D4) // WARANG CITI CAPITAL LETTER DA - case 0x118B5 => Array(0x118D5) // WARANG CITI CAPITAL LETTER AT - case 0x118B6 => Array(0x118D6) // WARANG CITI CAPITAL LETTER AM - case 0x118B7 => Array(0x118D7) // WARANG CITI CAPITAL LETTER BU - case 0x118B8 => Array(0x118D8) // WARANG CITI CAPITAL LETTER PU - case 0x118B9 => Array(0x118D9) // WARANG CITI CAPITAL LETTER HIYO - case 0x118BA => Array(0x118DA) // WARANG CITI CAPITAL LETTER HOLO - case 0x118BB => Array(0x118DB) // WARANG CITI CAPITAL LETTER HORR - case 0x118BC => Array(0x118DC) // WARANG CITI CAPITAL LETTER HAR - case 0x118BD => Array(0x118DD) // WARANG CITI CAPITAL LETTER SSUU - case 0x118BE => Array(0x118DE) // WARANG CITI CAPITAL LETTER SII - case 0x118BF => Array(0x118DF) // WARANG CITI CAPITAL LETTER VIYO - case 0x16E40 => Array(0x16E60) // MEDEFAIDRIN CAPITAL LETTER M - case 0x16E41 => Array(0x16E61) // MEDEFAIDRIN CAPITAL LETTER S - case 0x16E42 => Array(0x16E62) // MEDEFAIDRIN CAPITAL LETTER V - case 0x16E43 => Array(0x16E63) // MEDEFAIDRIN CAPITAL LETTER W - case 0x16E44 => Array(0x16E64) // MEDEFAIDRIN CAPITAL LETTER ATIU - case 0x16E45 => Array(0x16E65) // MEDEFAIDRIN CAPITAL LETTER Z - case 0x16E46 => Array(0x16E66) // MEDEFAIDRIN CAPITAL LETTER KP - case 0x16E47 => Array(0x16E67) // MEDEFAIDRIN CAPITAL LETTER P - case 0x16E48 => Array(0x16E68) // MEDEFAIDRIN CAPITAL LETTER T - case 0x16E49 => Array(0x16E69) // MEDEFAIDRIN CAPITAL LETTER G - case 0x16E4A => Array(0x16E6A) // MEDEFAIDRIN CAPITAL LETTER F - case 0x16E4B => Array(0x16E6B) // MEDEFAIDRIN CAPITAL LETTER I - case 0x16E4C => Array(0x16E6C) // MEDEFAIDRIN CAPITAL LETTER K - case 0x16E4D => Array(0x16E6D) // MEDEFAIDRIN CAPITAL LETTER A - case 0x16E4E => Array(0x16E6E) // MEDEFAIDRIN CAPITAL LETTER J - case 0x16E4F => Array(0x16E6F) // MEDEFAIDRIN CAPITAL LETTER E - case 0x16E50 => Array(0x16E70) // MEDEFAIDRIN CAPITAL LETTER B - case 0x16E51 => Array(0x16E71) // MEDEFAIDRIN CAPITAL LETTER C - case 0x16E52 => Array(0x16E72) // MEDEFAIDRIN CAPITAL LETTER U - case 0x16E53 => Array(0x16E73) // MEDEFAIDRIN CAPITAL LETTER YU - case 0x16E54 => Array(0x16E74) // MEDEFAIDRIN CAPITAL LETTER L - case 0x16E55 => Array(0x16E75) // MEDEFAIDRIN CAPITAL LETTER Q - case 0x16E56 => Array(0x16E76) // MEDEFAIDRIN CAPITAL LETTER HP - case 0x16E57 => Array(0x16E77) // MEDEFAIDRIN CAPITAL LETTER NY - case 0x16E58 => Array(0x16E78) // MEDEFAIDRIN CAPITAL LETTER X - case 0x16E59 => Array(0x16E79) // MEDEFAIDRIN CAPITAL LETTER D - case 0x16E5A => Array(0x16E7A) // MEDEFAIDRIN CAPITAL LETTER OE - case 0x16E5B => Array(0x16E7B) // MEDEFAIDRIN CAPITAL LETTER N - case 0x16E5C => Array(0x16E7C) // MEDEFAIDRIN CAPITAL LETTER R - case 0x16E5D => Array(0x16E7D) // MEDEFAIDRIN CAPITAL LETTER O - case 0x16E5E => Array(0x16E7E) // MEDEFAIDRIN CAPITAL LETTER AI - case 0x16E5F => Array(0x16E7F) // MEDEFAIDRIN CAPITAL LETTER Y - case 0x1E900 => Array(0x1E922) // ADLAM CAPITAL LETTER ALIF - case 0x1E901 => Array(0x1E923) // ADLAM CAPITAL LETTER DAALI - case 0x1E902 => Array(0x1E924) // ADLAM CAPITAL LETTER LAAM - case 0x1E903 => Array(0x1E925) // ADLAM CAPITAL LETTER MIIM - case 0x1E904 => Array(0x1E926) // ADLAM CAPITAL LETTER BA - case 0x1E905 => Array(0x1E927) // ADLAM CAPITAL LETTER SINNYIIYHE - case 0x1E906 => Array(0x1E928) // ADLAM CAPITAL LETTER PE - case 0x1E907 => Array(0x1E929) // ADLAM CAPITAL LETTER BHE - case 0x1E908 => Array(0x1E92A) // ADLAM CAPITAL LETTER RA - case 0x1E909 => Array(0x1E92B) // ADLAM CAPITAL LETTER E - case 0x1E90A => Array(0x1E92C) // ADLAM CAPITAL LETTER FA - case 0x1E90B => Array(0x1E92D) // ADLAM CAPITAL LETTER I - case 0x1E90C => Array(0x1E92E) // ADLAM CAPITAL LETTER O - case 0x1E90D => Array(0x1E92F) // ADLAM CAPITAL LETTER DHA - case 0x1E90E => Array(0x1E930) // ADLAM CAPITAL LETTER YHE - case 0x1E90F => Array(0x1E931) // ADLAM CAPITAL LETTER WAW - case 0x1E910 => Array(0x1E932) // ADLAM CAPITAL LETTER NUN - case 0x1E911 => Array(0x1E933) // ADLAM CAPITAL LETTER KAF - case 0x1E912 => Array(0x1E934) // ADLAM CAPITAL LETTER YA - case 0x1E913 => Array(0x1E935) // ADLAM CAPITAL LETTER U - case 0x1E914 => Array(0x1E936) // ADLAM CAPITAL LETTER JIIM - case 0x1E915 => Array(0x1E937) // ADLAM CAPITAL LETTER CHI - case 0x1E916 => Array(0x1E938) // ADLAM CAPITAL LETTER HA - case 0x1E917 => Array(0x1E939) // ADLAM CAPITAL LETTER QAAF - case 0x1E918 => Array(0x1E93A) // ADLAM CAPITAL LETTER GA - case 0x1E919 => Array(0x1E93B) // ADLAM CAPITAL LETTER NYA - case 0x1E91A => Array(0x1E93C) // ADLAM CAPITAL LETTER TU - case 0x1E91B => Array(0x1E93D) // ADLAM CAPITAL LETTER NHA - case 0x1E91C => Array(0x1E93E) // ADLAM CAPITAL LETTER VA - case 0x1E91D => Array(0x1E93F) // ADLAM CAPITAL LETTER KHA - case 0x1E91E => Array(0x1E940) // ADLAM CAPITAL LETTER GBE - case 0x1E91F => Array(0x1E941) // ADLAM CAPITAL LETTER ZAL - case 0x1E920 => Array(0x1E942) // ADLAM CAPITAL LETTER KPO - case 0x1E921 => Array(0x1E943) // ADLAM CAPITAL LETTER SHA case 0x00DF => Array(0x0073, 0x0073) // LATIN SMALL LETTER SHARP S case 0x0130 => Array(0x0069, 0x0307) // LATIN CAPITAL LETTER I WITH DOT ABOVE case 0x0149 => Array(0x02BC, 0x006E) // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE @@ -1545,6 +163,1483 @@ private[ci] object CaseFolding { case 0xFB15 => Array(0x0574, 0x056B) // ARMENIAN SMALL LIGATURE MEN INI case 0xFB16 => Array(0x057E, 0x0576) // ARMENIAN SMALL LIGATURE VEW NOW case 0xFB17 => Array(0x0574, 0x056D) // ARMENIAN SMALL LIGATURE MEN XEH - case _ => Array(codePoint) // All others map to themselves + case _ => // The full rules defer to the common rules + Array(commonCaseFoldedCodePoints(codePoint)) + } + + /** This function transforms a Unicode codePoint into it's simple case folded + * variant using the default rules. + * + * It is equivalent to the "C + S" rules from `CaseFolding.txt`. + */ + def simpleCaseFoldedCodePoints(codePoint: Int): Int = + codePoint match { + case 0x1E9E => 0x00DF // LATIN CAPITAL LETTER SHARP S + case 0x1F88 => 0x1F80 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI + case 0x1F89 => 0x1F81 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI + case 0x1F8A => 0x1F82 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1F8B => 0x1F83 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1F8C => 0x1F84 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1F8D => 0x1F85 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1F8E => 0x1F86 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F8F => 0x1F87 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F98 => 0x1F90 // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI + case 0x1F99 => 0x1F91 // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI + case 0x1F9A => 0x1F92 // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1F9B => 0x1F93 // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1F9C => 0x1F94 // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1F9D => 0x1F95 // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1F9E => 0x1F96 // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1F9F => 0x1F97 // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FA8 => 0x1FA0 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI + case 0x1FA9 => 0x1FA1 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI + case 0x1FAA => 0x1FA2 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1FAB => 0x1FA3 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1FAC => 0x1FA4 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1FAD => 0x1FA5 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1FAE => 0x1FA6 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FAF => 0x1FA7 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1FBC => 0x1FB3 // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + case 0x1FCC => 0x1FC3 // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + case 0x1FFC => 0x1FF3 // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + case _ => commonCaseFoldedCodePoints(codePoint) + } + + /** This function transforms a Unicode codePoint into it's common case folded + * form. + * + * This lookup can only be validly used in concert with either the simple + * or full case folding rules (with or without the special cases for some + * Turkic languages). This is why this function is `private`. + */ + private def commonCaseFoldedCodePoints(codePoint: Int): Int = + codePoint match { + case 0x0041 => 0x0061 // LATIN CAPITAL LETTER A + case 0x0042 => 0x0062 // LATIN CAPITAL LETTER B + case 0x0043 => 0x0063 // LATIN CAPITAL LETTER C + case 0x0044 => 0x0064 // LATIN CAPITAL LETTER D + case 0x0045 => 0x0065 // LATIN CAPITAL LETTER E + case 0x0046 => 0x0066 // LATIN CAPITAL LETTER F + case 0x0047 => 0x0067 // LATIN CAPITAL LETTER G + case 0x0048 => 0x0068 // LATIN CAPITAL LETTER H + case 0x0049 => 0x0069 // LATIN CAPITAL LETTER I + case 0x004A => 0x006A // LATIN CAPITAL LETTER J + case 0x004B => 0x006B // LATIN CAPITAL LETTER K + case 0x004C => 0x006C // LATIN CAPITAL LETTER L + case 0x004D => 0x006D // LATIN CAPITAL LETTER M + case 0x004E => 0x006E // LATIN CAPITAL LETTER N + case 0x004F => 0x006F // LATIN CAPITAL LETTER O + case 0x0050 => 0x0070 // LATIN CAPITAL LETTER P + case 0x0051 => 0x0071 // LATIN CAPITAL LETTER Q + case 0x0052 => 0x0072 // LATIN CAPITAL LETTER R + case 0x0053 => 0x0073 // LATIN CAPITAL LETTER S + case 0x0054 => 0x0074 // LATIN CAPITAL LETTER T + case 0x0055 => 0x0075 // LATIN CAPITAL LETTER U + case 0x0056 => 0x0076 // LATIN CAPITAL LETTER V + case 0x0057 => 0x0077 // LATIN CAPITAL LETTER W + case 0x0058 => 0x0078 // LATIN CAPITAL LETTER X + case 0x0059 => 0x0079 // LATIN CAPITAL LETTER Y + case 0x005A => 0x007A // LATIN CAPITAL LETTER Z + case 0x00B5 => 0x03BC // MICRO SIGN + case 0x00C0 => 0x00E0 // LATIN CAPITAL LETTER A WITH GRAVE + case 0x00C1 => 0x00E1 // LATIN CAPITAL LETTER A WITH ACUTE + case 0x00C2 => 0x00E2 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX + case 0x00C3 => 0x00E3 // LATIN CAPITAL LETTER A WITH TILDE + case 0x00C4 => 0x00E4 // LATIN CAPITAL LETTER A WITH DIAERESIS + case 0x00C5 => 0x00E5 // LATIN CAPITAL LETTER A WITH RING ABOVE + case 0x00C6 => 0x00E6 // LATIN CAPITAL LETTER AE + case 0x00C7 => 0x00E7 // LATIN CAPITAL LETTER C WITH CEDILLA + case 0x00C8 => 0x00E8 // LATIN CAPITAL LETTER E WITH GRAVE + case 0x00C9 => 0x00E9 // LATIN CAPITAL LETTER E WITH ACUTE + case 0x00CA => 0x00EA // LATIN CAPITAL LETTER E WITH CIRCUMFLEX + case 0x00CB => 0x00EB // LATIN CAPITAL LETTER E WITH DIAERESIS + case 0x00CC => 0x00EC // LATIN CAPITAL LETTER I WITH GRAVE + case 0x00CD => 0x00ED // LATIN CAPITAL LETTER I WITH ACUTE + case 0x00CE => 0x00EE // LATIN CAPITAL LETTER I WITH CIRCUMFLEX + case 0x00CF => 0x00EF // LATIN CAPITAL LETTER I WITH DIAERESIS + case 0x00D0 => 0x00F0 // LATIN CAPITAL LETTER ETH + case 0x00D1 => 0x00F1 // LATIN CAPITAL LETTER N WITH TILDE + case 0x00D2 => 0x00F2 // LATIN CAPITAL LETTER O WITH GRAVE + case 0x00D3 => 0x00F3 // LATIN CAPITAL LETTER O WITH ACUTE + case 0x00D4 => 0x00F4 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX + case 0x00D5 => 0x00F5 // LATIN CAPITAL LETTER O WITH TILDE + case 0x00D6 => 0x00F6 // LATIN CAPITAL LETTER O WITH DIAERESIS + case 0x00D8 => 0x00F8 // LATIN CAPITAL LETTER O WITH STROKE + case 0x00D9 => 0x00F9 // LATIN CAPITAL LETTER U WITH GRAVE + case 0x00DA => 0x00FA // LATIN CAPITAL LETTER U WITH ACUTE + case 0x00DB => 0x00FB // LATIN CAPITAL LETTER U WITH CIRCUMFLEX + case 0x00DC => 0x00FC // LATIN CAPITAL LETTER U WITH DIAERESIS + case 0x00DD => 0x00FD // LATIN CAPITAL LETTER Y WITH ACUTE + case 0x00DE => 0x00FE // LATIN CAPITAL LETTER THORN + case 0x0100 => 0x0101 // LATIN CAPITAL LETTER A WITH MACRON + case 0x0102 => 0x0103 // LATIN CAPITAL LETTER A WITH BREVE + case 0x0104 => 0x0105 // LATIN CAPITAL LETTER A WITH OGONEK + case 0x0106 => 0x0107 // LATIN CAPITAL LETTER C WITH ACUTE + case 0x0108 => 0x0109 // LATIN CAPITAL LETTER C WITH CIRCUMFLEX + case 0x010A => 0x010B // LATIN CAPITAL LETTER C WITH DOT ABOVE + case 0x010C => 0x010D // LATIN CAPITAL LETTER C WITH CARON + case 0x010E => 0x010F // LATIN CAPITAL LETTER D WITH CARON + case 0x0110 => 0x0111 // LATIN CAPITAL LETTER D WITH STROKE + case 0x0112 => 0x0113 // LATIN CAPITAL LETTER E WITH MACRON + case 0x0114 => 0x0115 // LATIN CAPITAL LETTER E WITH BREVE + case 0x0116 => 0x0117 // LATIN CAPITAL LETTER E WITH DOT ABOVE + case 0x0118 => 0x0119 // LATIN CAPITAL LETTER E WITH OGONEK + case 0x011A => 0x011B // LATIN CAPITAL LETTER E WITH CARON + case 0x011C => 0x011D // LATIN CAPITAL LETTER G WITH CIRCUMFLEX + case 0x011E => 0x011F // LATIN CAPITAL LETTER G WITH BREVE + case 0x0120 => 0x0121 // LATIN CAPITAL LETTER G WITH DOT ABOVE + case 0x0122 => 0x0123 // LATIN CAPITAL LETTER G WITH CEDILLA + case 0x0124 => 0x0125 // LATIN CAPITAL LETTER H WITH CIRCUMFLEX + case 0x0126 => 0x0127 // LATIN CAPITAL LETTER H WITH STROKE + case 0x0128 => 0x0129 // LATIN CAPITAL LETTER I WITH TILDE + case 0x012A => 0x012B // LATIN CAPITAL LETTER I WITH MACRON + case 0x012C => 0x012D // LATIN CAPITAL LETTER I WITH BREVE + case 0x012E => 0x012F // LATIN CAPITAL LETTER I WITH OGONEK + case 0x0132 => 0x0133 // LATIN CAPITAL LIGATURE IJ + case 0x0134 => 0x0135 // LATIN CAPITAL LETTER J WITH CIRCUMFLEX + case 0x0136 => 0x0137 // LATIN CAPITAL LETTER K WITH CEDILLA + case 0x0139 => 0x013A // LATIN CAPITAL LETTER L WITH ACUTE + case 0x013B => 0x013C // LATIN CAPITAL LETTER L WITH CEDILLA + case 0x013D => 0x013E // LATIN CAPITAL LETTER L WITH CARON + case 0x013F => 0x0140 // LATIN CAPITAL LETTER L WITH MIDDLE DOT + case 0x0141 => 0x0142 // LATIN CAPITAL LETTER L WITH STROKE + case 0x0143 => 0x0144 // LATIN CAPITAL LETTER N WITH ACUTE + case 0x0145 => 0x0146 // LATIN CAPITAL LETTER N WITH CEDILLA + case 0x0147 => 0x0148 // LATIN CAPITAL LETTER N WITH CARON + case 0x014A => 0x014B // LATIN CAPITAL LETTER ENG + case 0x014C => 0x014D // LATIN CAPITAL LETTER O WITH MACRON + case 0x014E => 0x014F // LATIN CAPITAL LETTER O WITH BREVE + case 0x0150 => 0x0151 // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + case 0x0152 => 0x0153 // LATIN CAPITAL LIGATURE OE + case 0x0154 => 0x0155 // LATIN CAPITAL LETTER R WITH ACUTE + case 0x0156 => 0x0157 // LATIN CAPITAL LETTER R WITH CEDILLA + case 0x0158 => 0x0159 // LATIN CAPITAL LETTER R WITH CARON + case 0x015A => 0x015B // LATIN CAPITAL LETTER S WITH ACUTE + case 0x015C => 0x015D // LATIN CAPITAL LETTER S WITH CIRCUMFLEX + case 0x015E => 0x015F // LATIN CAPITAL LETTER S WITH CEDILLA + case 0x0160 => 0x0161 // LATIN CAPITAL LETTER S WITH CARON + case 0x0162 => 0x0163 // LATIN CAPITAL LETTER T WITH CEDILLA + case 0x0164 => 0x0165 // LATIN CAPITAL LETTER T WITH CARON + case 0x0166 => 0x0167 // LATIN CAPITAL LETTER T WITH STROKE + case 0x0168 => 0x0169 // LATIN CAPITAL LETTER U WITH TILDE + case 0x016A => 0x016B // LATIN CAPITAL LETTER U WITH MACRON + case 0x016C => 0x016D // LATIN CAPITAL LETTER U WITH BREVE + case 0x016E => 0x016F // LATIN CAPITAL LETTER U WITH RING ABOVE + case 0x0170 => 0x0171 // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + case 0x0172 => 0x0173 // LATIN CAPITAL LETTER U WITH OGONEK + case 0x0174 => 0x0175 // LATIN CAPITAL LETTER W WITH CIRCUMFLEX + case 0x0176 => 0x0177 // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + case 0x0178 => 0x00FF // LATIN CAPITAL LETTER Y WITH DIAERESIS + case 0x0179 => 0x017A // LATIN CAPITAL LETTER Z WITH ACUTE + case 0x017B => 0x017C // LATIN CAPITAL LETTER Z WITH DOT ABOVE + case 0x017D => 0x017E // LATIN CAPITAL LETTER Z WITH CARON + case 0x017F => 0x0073 // LATIN SMALL LETTER LONG S + case 0x0181 => 0x0253 // LATIN CAPITAL LETTER B WITH HOOK + case 0x0182 => 0x0183 // LATIN CAPITAL LETTER B WITH TOPBAR + case 0x0184 => 0x0185 // LATIN CAPITAL LETTER TONE SIX + case 0x0186 => 0x0254 // LATIN CAPITAL LETTER OPEN O + case 0x0187 => 0x0188 // LATIN CAPITAL LETTER C WITH HOOK + case 0x0189 => 0x0256 // LATIN CAPITAL LETTER AFRICAN D + case 0x018A => 0x0257 // LATIN CAPITAL LETTER D WITH HOOK + case 0x018B => 0x018C // LATIN CAPITAL LETTER D WITH TOPBAR + case 0x018E => 0x01DD // LATIN CAPITAL LETTER REVERSED E + case 0x018F => 0x0259 // LATIN CAPITAL LETTER SCHWA + case 0x0190 => 0x025B // LATIN CAPITAL LETTER OPEN E + case 0x0191 => 0x0192 // LATIN CAPITAL LETTER F WITH HOOK + case 0x0193 => 0x0260 // LATIN CAPITAL LETTER G WITH HOOK + case 0x0194 => 0x0263 // LATIN CAPITAL LETTER GAMMA + case 0x0196 => 0x0269 // LATIN CAPITAL LETTER IOTA + case 0x0197 => 0x0268 // LATIN CAPITAL LETTER I WITH STROKE + case 0x0198 => 0x0199 // LATIN CAPITAL LETTER K WITH HOOK + case 0x019C => 0x026F // LATIN CAPITAL LETTER TURNED M + case 0x019D => 0x0272 // LATIN CAPITAL LETTER N WITH LEFT HOOK + case 0x019F => 0x0275 // LATIN CAPITAL LETTER O WITH MIDDLE TILDE + case 0x01A0 => 0x01A1 // LATIN CAPITAL LETTER O WITH HORN + case 0x01A2 => 0x01A3 // LATIN CAPITAL LETTER OI + case 0x01A4 => 0x01A5 // LATIN CAPITAL LETTER P WITH HOOK + case 0x01A6 => 0x0280 // LATIN LETTER YR + case 0x01A7 => 0x01A8 // LATIN CAPITAL LETTER TONE TWO + case 0x01A9 => 0x0283 // LATIN CAPITAL LETTER ESH + case 0x01AC => 0x01AD // LATIN CAPITAL LETTER T WITH HOOK + case 0x01AE => 0x0288 // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK + case 0x01AF => 0x01B0 // LATIN CAPITAL LETTER U WITH HORN + case 0x01B1 => 0x028A // LATIN CAPITAL LETTER UPSILON + case 0x01B2 => 0x028B // LATIN CAPITAL LETTER V WITH HOOK + case 0x01B3 => 0x01B4 // LATIN CAPITAL LETTER Y WITH HOOK + case 0x01B5 => 0x01B6 // LATIN CAPITAL LETTER Z WITH STROKE + case 0x01B7 => 0x0292 // LATIN CAPITAL LETTER EZH + case 0x01B8 => 0x01B9 // LATIN CAPITAL LETTER EZH REVERSED + case 0x01BC => 0x01BD // LATIN CAPITAL LETTER TONE FIVE + case 0x01C4 => 0x01C6 // LATIN CAPITAL LETTER DZ WITH CARON + case 0x01C5 => 0x01C6 // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON + case 0x01C7 => 0x01C9 // LATIN CAPITAL LETTER LJ + case 0x01C8 => 0x01C9 // LATIN CAPITAL LETTER L WITH SMALL LETTER J + case 0x01CA => 0x01CC // LATIN CAPITAL LETTER NJ + case 0x01CB => 0x01CC // LATIN CAPITAL LETTER N WITH SMALL LETTER J + case 0x01CD => 0x01CE // LATIN CAPITAL LETTER A WITH CARON + case 0x01CF => 0x01D0 // LATIN CAPITAL LETTER I WITH CARON + case 0x01D1 => 0x01D2 // LATIN CAPITAL LETTER O WITH CARON + case 0x01D3 => 0x01D4 // LATIN CAPITAL LETTER U WITH CARON + case 0x01D5 => 0x01D6 // LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON + case 0x01D7 => 0x01D8 // LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE + case 0x01D9 => 0x01DA // LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON + case 0x01DB => 0x01DC // LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE + case 0x01DE => 0x01DF // LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON + case 0x01E0 => 0x01E1 // LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON + case 0x01E2 => 0x01E3 // LATIN CAPITAL LETTER AE WITH MACRON + case 0x01E4 => 0x01E5 // LATIN CAPITAL LETTER G WITH STROKE + case 0x01E6 => 0x01E7 // LATIN CAPITAL LETTER G WITH CARON + case 0x01E8 => 0x01E9 // LATIN CAPITAL LETTER K WITH CARON + case 0x01EA => 0x01EB // LATIN CAPITAL LETTER O WITH OGONEK + case 0x01EC => 0x01ED // LATIN CAPITAL LETTER O WITH OGONEK AND MACRON + case 0x01EE => 0x01EF // LATIN CAPITAL LETTER EZH WITH CARON + case 0x01F1 => 0x01F3 // LATIN CAPITAL LETTER DZ + case 0x01F2 => 0x01F3 // LATIN CAPITAL LETTER D WITH SMALL LETTER Z + case 0x01F4 => 0x01F5 // LATIN CAPITAL LETTER G WITH ACUTE + case 0x01F6 => 0x0195 // LATIN CAPITAL LETTER HWAIR + case 0x01F7 => 0x01BF // LATIN CAPITAL LETTER WYNN + case 0x01F8 => 0x01F9 // LATIN CAPITAL LETTER N WITH GRAVE + case 0x01FA => 0x01FB // LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE + case 0x01FC => 0x01FD // LATIN CAPITAL LETTER AE WITH ACUTE + case 0x01FE => 0x01FF // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE + case 0x0200 => 0x0201 // LATIN CAPITAL LETTER A WITH DOUBLE GRAVE + case 0x0202 => 0x0203 // LATIN CAPITAL LETTER A WITH INVERTED BREVE + case 0x0204 => 0x0205 // LATIN CAPITAL LETTER E WITH DOUBLE GRAVE + case 0x0206 => 0x0207 // LATIN CAPITAL LETTER E WITH INVERTED BREVE + case 0x0208 => 0x0209 // LATIN CAPITAL LETTER I WITH DOUBLE GRAVE + case 0x020A => 0x020B // LATIN CAPITAL LETTER I WITH INVERTED BREVE + case 0x020C => 0x020D // LATIN CAPITAL LETTER O WITH DOUBLE GRAVE + case 0x020E => 0x020F // LATIN CAPITAL LETTER O WITH INVERTED BREVE + case 0x0210 => 0x0211 // LATIN CAPITAL LETTER R WITH DOUBLE GRAVE + case 0x0212 => 0x0213 // LATIN CAPITAL LETTER R WITH INVERTED BREVE + case 0x0214 => 0x0215 // LATIN CAPITAL LETTER U WITH DOUBLE GRAVE + case 0x0216 => 0x0217 // LATIN CAPITAL LETTER U WITH INVERTED BREVE + case 0x0218 => 0x0219 // LATIN CAPITAL LETTER S WITH COMMA BELOW + case 0x021A => 0x021B // LATIN CAPITAL LETTER T WITH COMMA BELOW + case 0x021C => 0x021D // LATIN CAPITAL LETTER YOGH + case 0x021E => 0x021F // LATIN CAPITAL LETTER H WITH CARON + case 0x0220 => 0x019E // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG + case 0x0222 => 0x0223 // LATIN CAPITAL LETTER OU + case 0x0224 => 0x0225 // LATIN CAPITAL LETTER Z WITH HOOK + case 0x0226 => 0x0227 // LATIN CAPITAL LETTER A WITH DOT ABOVE + case 0x0228 => 0x0229 // LATIN CAPITAL LETTER E WITH CEDILLA + case 0x022A => 0x022B // LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON + case 0x022C => 0x022D // LATIN CAPITAL LETTER O WITH TILDE AND MACRON + case 0x022E => 0x022F // LATIN CAPITAL LETTER O WITH DOT ABOVE + case 0x0230 => 0x0231 // LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON + case 0x0232 => 0x0233 // LATIN CAPITAL LETTER Y WITH MACRON + case 0x023A => 0x2C65 // LATIN CAPITAL LETTER A WITH STROKE + case 0x023B => 0x023C // LATIN CAPITAL LETTER C WITH STROKE + case 0x023D => 0x019A // LATIN CAPITAL LETTER L WITH BAR + case 0x023E => 0x2C66 // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE + case 0x0241 => 0x0242 // LATIN CAPITAL LETTER GLOTTAL STOP + case 0x0243 => 0x0180 // LATIN CAPITAL LETTER B WITH STROKE + case 0x0244 => 0x0289 // LATIN CAPITAL LETTER U BAR + case 0x0245 => 0x028C // LATIN CAPITAL LETTER TURNED V + case 0x0246 => 0x0247 // LATIN CAPITAL LETTER E WITH STROKE + case 0x0248 => 0x0249 // LATIN CAPITAL LETTER J WITH STROKE + case 0x024A => 0x024B // LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL + case 0x024C => 0x024D // LATIN CAPITAL LETTER R WITH STROKE + case 0x024E => 0x024F // LATIN CAPITAL LETTER Y WITH STROKE + case 0x0345 => 0x03B9 // COMBINING GREEK YPOGEGRAMMENI + case 0x0370 => 0x0371 // GREEK CAPITAL LETTER HETA + case 0x0372 => 0x0373 // GREEK CAPITAL LETTER ARCHAIC SAMPI + case 0x0376 => 0x0377 // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA + case 0x037F => 0x03F3 // GREEK CAPITAL LETTER YOT + case 0x0386 => 0x03AC // GREEK CAPITAL LETTER ALPHA WITH TONOS + case 0x0388 => 0x03AD // GREEK CAPITAL LETTER EPSILON WITH TONOS + case 0x0389 => 0x03AE // GREEK CAPITAL LETTER ETA WITH TONOS + case 0x038A => 0x03AF // GREEK CAPITAL LETTER IOTA WITH TONOS + case 0x038C => 0x03CC // GREEK CAPITAL LETTER OMICRON WITH TONOS + case 0x038E => 0x03CD // GREEK CAPITAL LETTER UPSILON WITH TONOS + case 0x038F => 0x03CE // GREEK CAPITAL LETTER OMEGA WITH TONOS + case 0x0391 => 0x03B1 // GREEK CAPITAL LETTER ALPHA + case 0x0392 => 0x03B2 // GREEK CAPITAL LETTER BETA + case 0x0393 => 0x03B3 // GREEK CAPITAL LETTER GAMMA + case 0x0394 => 0x03B4 // GREEK CAPITAL LETTER DELTA + case 0x0395 => 0x03B5 // GREEK CAPITAL LETTER EPSILON + case 0x0396 => 0x03B6 // GREEK CAPITAL LETTER ZETA + case 0x0397 => 0x03B7 // GREEK CAPITAL LETTER ETA + case 0x0398 => 0x03B8 // GREEK CAPITAL LETTER THETA + case 0x0399 => 0x03B9 // GREEK CAPITAL LETTER IOTA + case 0x039A => 0x03BA // GREEK CAPITAL LETTER KAPPA + case 0x039B => 0x03BB // GREEK CAPITAL LETTER LAMDA + case 0x039C => 0x03BC // GREEK CAPITAL LETTER MU + case 0x039D => 0x03BD // GREEK CAPITAL LETTER NU + case 0x039E => 0x03BE // GREEK CAPITAL LETTER XI + case 0x039F => 0x03BF // GREEK CAPITAL LETTER OMICRON + case 0x03A0 => 0x03C0 // GREEK CAPITAL LETTER PI + case 0x03A1 => 0x03C1 // GREEK CAPITAL LETTER RHO + case 0x03A3 => 0x03C3 // GREEK CAPITAL LETTER SIGMA + case 0x03A4 => 0x03C4 // GREEK CAPITAL LETTER TAU + case 0x03A5 => 0x03C5 // GREEK CAPITAL LETTER UPSILON + case 0x03A6 => 0x03C6 // GREEK CAPITAL LETTER PHI + case 0x03A7 => 0x03C7 // GREEK CAPITAL LETTER CHI + case 0x03A8 => 0x03C8 // GREEK CAPITAL LETTER PSI + case 0x03A9 => 0x03C9 // GREEK CAPITAL LETTER OMEGA + case 0x03AA => 0x03CA // GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + case 0x03AB => 0x03CB // GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + case 0x03C2 => 0x03C3 // GREEK SMALL LETTER FINAL SIGMA + case 0x03CF => 0x03D7 // GREEK CAPITAL KAI SYMBOL + case 0x03D0 => 0x03B2 // GREEK BETA SYMBOL + case 0x03D1 => 0x03B8 // GREEK THETA SYMBOL + case 0x03D5 => 0x03C6 // GREEK PHI SYMBOL + case 0x03D6 => 0x03C0 // GREEK PI SYMBOL + case 0x03D8 => 0x03D9 // GREEK LETTER ARCHAIC KOPPA + case 0x03DA => 0x03DB // GREEK LETTER STIGMA + case 0x03DC => 0x03DD // GREEK LETTER DIGAMMA + case 0x03DE => 0x03DF // GREEK LETTER KOPPA + case 0x03E0 => 0x03E1 // GREEK LETTER SAMPI + case 0x03E2 => 0x03E3 // COPTIC CAPITAL LETTER SHEI + case 0x03E4 => 0x03E5 // COPTIC CAPITAL LETTER FEI + case 0x03E6 => 0x03E7 // COPTIC CAPITAL LETTER KHEI + case 0x03E8 => 0x03E9 // COPTIC CAPITAL LETTER HORI + case 0x03EA => 0x03EB // COPTIC CAPITAL LETTER GANGIA + case 0x03EC => 0x03ED // COPTIC CAPITAL LETTER SHIMA + case 0x03EE => 0x03EF // COPTIC CAPITAL LETTER DEI + case 0x03F0 => 0x03BA // GREEK KAPPA SYMBOL + case 0x03F1 => 0x03C1 // GREEK RHO SYMBOL + case 0x03F4 => 0x03B8 // GREEK CAPITAL THETA SYMBOL + case 0x03F5 => 0x03B5 // GREEK LUNATE EPSILON SYMBOL + case 0x03F7 => 0x03F8 // GREEK CAPITAL LETTER SHO + case 0x03F9 => 0x03F2 // GREEK CAPITAL LUNATE SIGMA SYMBOL + case 0x03FA => 0x03FB // GREEK CAPITAL LETTER SAN + case 0x03FD => 0x037B // GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL + case 0x03FE => 0x037C // GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL + case 0x03FF => 0x037D // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL + case 0x0400 => 0x0450 // CYRILLIC CAPITAL LETTER IE WITH GRAVE + case 0x0401 => 0x0451 // CYRILLIC CAPITAL LETTER IO + case 0x0402 => 0x0452 // CYRILLIC CAPITAL LETTER DJE + case 0x0403 => 0x0453 // CYRILLIC CAPITAL LETTER GJE + case 0x0404 => 0x0454 // CYRILLIC CAPITAL LETTER UKRAINIAN IE + case 0x0405 => 0x0455 // CYRILLIC CAPITAL LETTER DZE + case 0x0406 => 0x0456 // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + case 0x0407 => 0x0457 // CYRILLIC CAPITAL LETTER YI + case 0x0408 => 0x0458 // CYRILLIC CAPITAL LETTER JE + case 0x0409 => 0x0459 // CYRILLIC CAPITAL LETTER LJE + case 0x040A => 0x045A // CYRILLIC CAPITAL LETTER NJE + case 0x040B => 0x045B // CYRILLIC CAPITAL LETTER TSHE + case 0x040C => 0x045C // CYRILLIC CAPITAL LETTER KJE + case 0x040D => 0x045D // CYRILLIC CAPITAL LETTER I WITH GRAVE + case 0x040E => 0x045E // CYRILLIC CAPITAL LETTER SHORT U + case 0x040F => 0x045F // CYRILLIC CAPITAL LETTER DZHE + case 0x0410 => 0x0430 // CYRILLIC CAPITAL LETTER A + case 0x0411 => 0x0431 // CYRILLIC CAPITAL LETTER BE + case 0x0412 => 0x0432 // CYRILLIC CAPITAL LETTER VE + case 0x0413 => 0x0433 // CYRILLIC CAPITAL LETTER GHE + case 0x0414 => 0x0434 // CYRILLIC CAPITAL LETTER DE + case 0x0415 => 0x0435 // CYRILLIC CAPITAL LETTER IE + case 0x0416 => 0x0436 // CYRILLIC CAPITAL LETTER ZHE + case 0x0417 => 0x0437 // CYRILLIC CAPITAL LETTER ZE + case 0x0418 => 0x0438 // CYRILLIC CAPITAL LETTER I + case 0x0419 => 0x0439 // CYRILLIC CAPITAL LETTER SHORT I + case 0x041A => 0x043A // CYRILLIC CAPITAL LETTER KA + case 0x041B => 0x043B // CYRILLIC CAPITAL LETTER EL + case 0x041C => 0x043C // CYRILLIC CAPITAL LETTER EM + case 0x041D => 0x043D // CYRILLIC CAPITAL LETTER EN + case 0x041E => 0x043E // CYRILLIC CAPITAL LETTER O + case 0x041F => 0x043F // CYRILLIC CAPITAL LETTER PE + case 0x0420 => 0x0440 // CYRILLIC CAPITAL LETTER ER + case 0x0421 => 0x0441 // CYRILLIC CAPITAL LETTER ES + case 0x0422 => 0x0442 // CYRILLIC CAPITAL LETTER TE + case 0x0423 => 0x0443 // CYRILLIC CAPITAL LETTER U + case 0x0424 => 0x0444 // CYRILLIC CAPITAL LETTER EF + case 0x0425 => 0x0445 // CYRILLIC CAPITAL LETTER HA + case 0x0426 => 0x0446 // CYRILLIC CAPITAL LETTER TSE + case 0x0427 => 0x0447 // CYRILLIC CAPITAL LETTER CHE + case 0x0428 => 0x0448 // CYRILLIC CAPITAL LETTER SHA + case 0x0429 => 0x0449 // CYRILLIC CAPITAL LETTER SHCHA + case 0x042A => 0x044A // CYRILLIC CAPITAL LETTER HARD SIGN + case 0x042B => 0x044B // CYRILLIC CAPITAL LETTER YERU + case 0x042C => 0x044C // CYRILLIC CAPITAL LETTER SOFT SIGN + case 0x042D => 0x044D // CYRILLIC CAPITAL LETTER E + case 0x042E => 0x044E // CYRILLIC CAPITAL LETTER YU + case 0x042F => 0x044F // CYRILLIC CAPITAL LETTER YA + case 0x0460 => 0x0461 // CYRILLIC CAPITAL LETTER OMEGA + case 0x0462 => 0x0463 // CYRILLIC CAPITAL LETTER YAT + case 0x0464 => 0x0465 // CYRILLIC CAPITAL LETTER IOTIFIED E + case 0x0466 => 0x0467 // CYRILLIC CAPITAL LETTER LITTLE YUS + case 0x0468 => 0x0469 // CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS + case 0x046A => 0x046B // CYRILLIC CAPITAL LETTER BIG YUS + case 0x046C => 0x046D // CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS + case 0x046E => 0x046F // CYRILLIC CAPITAL LETTER KSI + case 0x0470 => 0x0471 // CYRILLIC CAPITAL LETTER PSI + case 0x0472 => 0x0473 // CYRILLIC CAPITAL LETTER FITA + case 0x0474 => 0x0475 // CYRILLIC CAPITAL LETTER IZHITSA + case 0x0476 => 0x0477 // CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT + case 0x0478 => 0x0479 // CYRILLIC CAPITAL LETTER UK + case 0x047A => 0x047B // CYRILLIC CAPITAL LETTER ROUND OMEGA + case 0x047C => 0x047D // CYRILLIC CAPITAL LETTER OMEGA WITH TITLO + case 0x047E => 0x047F // CYRILLIC CAPITAL LETTER OT + case 0x0480 => 0x0481 // CYRILLIC CAPITAL LETTER KOPPA + case 0x048A => 0x048B // CYRILLIC CAPITAL LETTER SHORT I WITH TAIL + case 0x048C => 0x048D // CYRILLIC CAPITAL LETTER SEMISOFT SIGN + case 0x048E => 0x048F // CYRILLIC CAPITAL LETTER ER WITH TICK + case 0x0490 => 0x0491 // CYRILLIC CAPITAL LETTER GHE WITH UPTURN + case 0x0492 => 0x0493 // CYRILLIC CAPITAL LETTER GHE WITH STROKE + case 0x0494 => 0x0495 // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK + case 0x0496 => 0x0497 // CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER + case 0x0498 => 0x0499 // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER + case 0x049A => 0x049B // CYRILLIC CAPITAL LETTER KA WITH DESCENDER + case 0x049C => 0x049D // CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE + case 0x049E => 0x049F // CYRILLIC CAPITAL LETTER KA WITH STROKE + case 0x04A0 => 0x04A1 // CYRILLIC CAPITAL LETTER BASHKIR KA + case 0x04A2 => 0x04A3 // CYRILLIC CAPITAL LETTER EN WITH DESCENDER + case 0x04A4 => 0x04A5 // CYRILLIC CAPITAL LIGATURE EN GHE + case 0x04A6 => 0x04A7 // CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK + case 0x04A8 => 0x04A9 // CYRILLIC CAPITAL LETTER ABKHASIAN HA + case 0x04AA => 0x04AB // CYRILLIC CAPITAL LETTER ES WITH DESCENDER + case 0x04AC => 0x04AD // CYRILLIC CAPITAL LETTER TE WITH DESCENDER + case 0x04AE => 0x04AF // CYRILLIC CAPITAL LETTER STRAIGHT U + case 0x04B0 => 0x04B1 // CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE + case 0x04B2 => 0x04B3 // CYRILLIC CAPITAL LETTER HA WITH DESCENDER + case 0x04B4 => 0x04B5 // CYRILLIC CAPITAL LIGATURE TE TSE + case 0x04B6 => 0x04B7 // CYRILLIC CAPITAL LETTER CHE WITH DESCENDER + case 0x04B8 => 0x04B9 // CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE + case 0x04BA => 0x04BB // CYRILLIC CAPITAL LETTER SHHA + case 0x04BC => 0x04BD // CYRILLIC CAPITAL LETTER ABKHASIAN CHE + case 0x04BE => 0x04BF // CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER + case 0x04C0 => 0x04CF // CYRILLIC LETTER PALOCHKA + case 0x04C1 => 0x04C2 // CYRILLIC CAPITAL LETTER ZHE WITH BREVE + case 0x04C3 => 0x04C4 // CYRILLIC CAPITAL LETTER KA WITH HOOK + case 0x04C5 => 0x04C6 // CYRILLIC CAPITAL LETTER EL WITH TAIL + case 0x04C7 => 0x04C8 // CYRILLIC CAPITAL LETTER EN WITH HOOK + case 0x04C9 => 0x04CA // CYRILLIC CAPITAL LETTER EN WITH TAIL + case 0x04CB => 0x04CC // CYRILLIC CAPITAL LETTER KHAKASSIAN CHE + case 0x04CD => 0x04CE // CYRILLIC CAPITAL LETTER EM WITH TAIL + case 0x04D0 => 0x04D1 // CYRILLIC CAPITAL LETTER A WITH BREVE + case 0x04D2 => 0x04D3 // CYRILLIC CAPITAL LETTER A WITH DIAERESIS + case 0x04D4 => 0x04D5 // CYRILLIC CAPITAL LIGATURE A IE + case 0x04D6 => 0x04D7 // CYRILLIC CAPITAL LETTER IE WITH BREVE + case 0x04D8 => 0x04D9 // CYRILLIC CAPITAL LETTER SCHWA + case 0x04DA => 0x04DB // CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS + case 0x04DC => 0x04DD // CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS + case 0x04DE => 0x04DF // CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS + case 0x04E0 => 0x04E1 // CYRILLIC CAPITAL LETTER ABKHASIAN DZE + case 0x04E2 => 0x04E3 // CYRILLIC CAPITAL LETTER I WITH MACRON + case 0x04E4 => 0x04E5 // CYRILLIC CAPITAL LETTER I WITH DIAERESIS + case 0x04E6 => 0x04E7 // CYRILLIC CAPITAL LETTER O WITH DIAERESIS + case 0x04E8 => 0x04E9 // CYRILLIC CAPITAL LETTER BARRED O + case 0x04EA => 0x04EB // CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS + case 0x04EC => 0x04ED // CYRILLIC CAPITAL LETTER E WITH DIAERESIS + case 0x04EE => 0x04EF // CYRILLIC CAPITAL LETTER U WITH MACRON + case 0x04F0 => 0x04F1 // CYRILLIC CAPITAL LETTER U WITH DIAERESIS + case 0x04F2 => 0x04F3 // CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE + case 0x04F4 => 0x04F5 // CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS + case 0x04F6 => 0x04F7 // CYRILLIC CAPITAL LETTER GHE WITH DESCENDER + case 0x04F8 => 0x04F9 // CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS + case 0x04FA => 0x04FB // CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK + case 0x04FC => 0x04FD // CYRILLIC CAPITAL LETTER HA WITH HOOK + case 0x04FE => 0x04FF // CYRILLIC CAPITAL LETTER HA WITH STROKE + case 0x0500 => 0x0501 // CYRILLIC CAPITAL LETTER KOMI DE + case 0x0502 => 0x0503 // CYRILLIC CAPITAL LETTER KOMI DJE + case 0x0504 => 0x0505 // CYRILLIC CAPITAL LETTER KOMI ZJE + case 0x0506 => 0x0507 // CYRILLIC CAPITAL LETTER KOMI DZJE + case 0x0508 => 0x0509 // CYRILLIC CAPITAL LETTER KOMI LJE + case 0x050A => 0x050B // CYRILLIC CAPITAL LETTER KOMI NJE + case 0x050C => 0x050D // CYRILLIC CAPITAL LETTER KOMI SJE + case 0x050E => 0x050F // CYRILLIC CAPITAL LETTER KOMI TJE + case 0x0510 => 0x0511 // CYRILLIC CAPITAL LETTER REVERSED ZE + case 0x0512 => 0x0513 // CYRILLIC CAPITAL LETTER EL WITH HOOK + case 0x0514 => 0x0515 // CYRILLIC CAPITAL LETTER LHA + case 0x0516 => 0x0517 // CYRILLIC CAPITAL LETTER RHA + case 0x0518 => 0x0519 // CYRILLIC CAPITAL LETTER YAE + case 0x051A => 0x051B // CYRILLIC CAPITAL LETTER QA + case 0x051C => 0x051D // CYRILLIC CAPITAL LETTER WE + case 0x051E => 0x051F // CYRILLIC CAPITAL LETTER ALEUT KA + case 0x0520 => 0x0521 // CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK + case 0x0522 => 0x0523 // CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK + case 0x0524 => 0x0525 // CYRILLIC CAPITAL LETTER PE WITH DESCENDER + case 0x0526 => 0x0527 // CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER + case 0x0528 => 0x0529 // CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK + case 0x052A => 0x052B // CYRILLIC CAPITAL LETTER DZZHE + case 0x052C => 0x052D // CYRILLIC CAPITAL LETTER DCHE + case 0x052E => 0x052F // CYRILLIC CAPITAL LETTER EL WITH DESCENDER + case 0x0531 => 0x0561 // ARMENIAN CAPITAL LETTER AYB + case 0x0532 => 0x0562 // ARMENIAN CAPITAL LETTER BEN + case 0x0533 => 0x0563 // ARMENIAN CAPITAL LETTER GIM + case 0x0534 => 0x0564 // ARMENIAN CAPITAL LETTER DA + case 0x0535 => 0x0565 // ARMENIAN CAPITAL LETTER ECH + case 0x0536 => 0x0566 // ARMENIAN CAPITAL LETTER ZA + case 0x0537 => 0x0567 // ARMENIAN CAPITAL LETTER EH + case 0x0538 => 0x0568 // ARMENIAN CAPITAL LETTER ET + case 0x0539 => 0x0569 // ARMENIAN CAPITAL LETTER TO + case 0x053A => 0x056A // ARMENIAN CAPITAL LETTER ZHE + case 0x053B => 0x056B // ARMENIAN CAPITAL LETTER INI + case 0x053C => 0x056C // ARMENIAN CAPITAL LETTER LIWN + case 0x053D => 0x056D // ARMENIAN CAPITAL LETTER XEH + case 0x053E => 0x056E // ARMENIAN CAPITAL LETTER CA + case 0x053F => 0x056F // ARMENIAN CAPITAL LETTER KEN + case 0x0540 => 0x0570 // ARMENIAN CAPITAL LETTER HO + case 0x0541 => 0x0571 // ARMENIAN CAPITAL LETTER JA + case 0x0542 => 0x0572 // ARMENIAN CAPITAL LETTER GHAD + case 0x0543 => 0x0573 // ARMENIAN CAPITAL LETTER CHEH + case 0x0544 => 0x0574 // ARMENIAN CAPITAL LETTER MEN + case 0x0545 => 0x0575 // ARMENIAN CAPITAL LETTER YI + case 0x0546 => 0x0576 // ARMENIAN CAPITAL LETTER NOW + case 0x0547 => 0x0577 // ARMENIAN CAPITAL LETTER SHA + case 0x0548 => 0x0578 // ARMENIAN CAPITAL LETTER VO + case 0x0549 => 0x0579 // ARMENIAN CAPITAL LETTER CHA + case 0x054A => 0x057A // ARMENIAN CAPITAL LETTER PEH + case 0x054B => 0x057B // ARMENIAN CAPITAL LETTER JHEH + case 0x054C => 0x057C // ARMENIAN CAPITAL LETTER RA + case 0x054D => 0x057D // ARMENIAN CAPITAL LETTER SEH + case 0x054E => 0x057E // ARMENIAN CAPITAL LETTER VEW + case 0x054F => 0x057F // ARMENIAN CAPITAL LETTER TIWN + case 0x0550 => 0x0580 // ARMENIAN CAPITAL LETTER REH + case 0x0551 => 0x0581 // ARMENIAN CAPITAL LETTER CO + case 0x0552 => 0x0582 // ARMENIAN CAPITAL LETTER YIWN + case 0x0553 => 0x0583 // ARMENIAN CAPITAL LETTER PIWR + case 0x0554 => 0x0584 // ARMENIAN CAPITAL LETTER KEH + case 0x0555 => 0x0585 // ARMENIAN CAPITAL LETTER OH + case 0x0556 => 0x0586 // ARMENIAN CAPITAL LETTER FEH + case 0x10A0 => 0x2D00 // GEORGIAN CAPITAL LETTER AN + case 0x10A1 => 0x2D01 // GEORGIAN CAPITAL LETTER BAN + case 0x10A2 => 0x2D02 // GEORGIAN CAPITAL LETTER GAN + case 0x10A3 => 0x2D03 // GEORGIAN CAPITAL LETTER DON + case 0x10A4 => 0x2D04 // GEORGIAN CAPITAL LETTER EN + case 0x10A5 => 0x2D05 // GEORGIAN CAPITAL LETTER VIN + case 0x10A6 => 0x2D06 // GEORGIAN CAPITAL LETTER ZEN + case 0x10A7 => 0x2D07 // GEORGIAN CAPITAL LETTER TAN + case 0x10A8 => 0x2D08 // GEORGIAN CAPITAL LETTER IN + case 0x10A9 => 0x2D09 // GEORGIAN CAPITAL LETTER KAN + case 0x10AA => 0x2D0A // GEORGIAN CAPITAL LETTER LAS + case 0x10AB => 0x2D0B // GEORGIAN CAPITAL LETTER MAN + case 0x10AC => 0x2D0C // GEORGIAN CAPITAL LETTER NAR + case 0x10AD => 0x2D0D // GEORGIAN CAPITAL LETTER ON + case 0x10AE => 0x2D0E // GEORGIAN CAPITAL LETTER PAR + case 0x10AF => 0x2D0F // GEORGIAN CAPITAL LETTER ZHAR + case 0x10B0 => 0x2D10 // GEORGIAN CAPITAL LETTER RAE + case 0x10B1 => 0x2D11 // GEORGIAN CAPITAL LETTER SAN + case 0x10B2 => 0x2D12 // GEORGIAN CAPITAL LETTER TAR + case 0x10B3 => 0x2D13 // GEORGIAN CAPITAL LETTER UN + case 0x10B4 => 0x2D14 // GEORGIAN CAPITAL LETTER PHAR + case 0x10B5 => 0x2D15 // GEORGIAN CAPITAL LETTER KHAR + case 0x10B6 => 0x2D16 // GEORGIAN CAPITAL LETTER GHAN + case 0x10B7 => 0x2D17 // GEORGIAN CAPITAL LETTER QAR + case 0x10B8 => 0x2D18 // GEORGIAN CAPITAL LETTER SHIN + case 0x10B9 => 0x2D19 // GEORGIAN CAPITAL LETTER CHIN + case 0x10BA => 0x2D1A // GEORGIAN CAPITAL LETTER CAN + case 0x10BB => 0x2D1B // GEORGIAN CAPITAL LETTER JIL + case 0x10BC => 0x2D1C // GEORGIAN CAPITAL LETTER CIL + case 0x10BD => 0x2D1D // GEORGIAN CAPITAL LETTER CHAR + case 0x10BE => 0x2D1E // GEORGIAN CAPITAL LETTER XAN + case 0x10BF => 0x2D1F // GEORGIAN CAPITAL LETTER JHAN + case 0x10C0 => 0x2D20 // GEORGIAN CAPITAL LETTER HAE + case 0x10C1 => 0x2D21 // GEORGIAN CAPITAL LETTER HE + case 0x10C2 => 0x2D22 // GEORGIAN CAPITAL LETTER HIE + case 0x10C3 => 0x2D23 // GEORGIAN CAPITAL LETTER WE + case 0x10C4 => 0x2D24 // GEORGIAN CAPITAL LETTER HAR + case 0x10C5 => 0x2D25 // GEORGIAN CAPITAL LETTER HOE + case 0x10C7 => 0x2D27 // GEORGIAN CAPITAL LETTER YN + case 0x10CD => 0x2D2D // GEORGIAN CAPITAL LETTER AEN + case 0x13F8 => 0x13F0 // CHEROKEE SMALL LETTER YE + case 0x13F9 => 0x13F1 // CHEROKEE SMALL LETTER YI + case 0x13FA => 0x13F2 // CHEROKEE SMALL LETTER YO + case 0x13FB => 0x13F3 // CHEROKEE SMALL LETTER YU + case 0x13FC => 0x13F4 // CHEROKEE SMALL LETTER YV + case 0x13FD => 0x13F5 // CHEROKEE SMALL LETTER MV + case 0x1C80 => 0x0432 // CYRILLIC SMALL LETTER ROUNDED VE + case 0x1C81 => 0x0434 // CYRILLIC SMALL LETTER LONG-LEGGED DE + case 0x1C82 => 0x043E // CYRILLIC SMALL LETTER NARROW O + case 0x1C83 => 0x0441 // CYRILLIC SMALL LETTER WIDE ES + case 0x1C84 => 0x0442 // CYRILLIC SMALL LETTER TALL TE + case 0x1C85 => 0x0442 // CYRILLIC SMALL LETTER THREE-LEGGED TE + case 0x1C86 => 0x044A // CYRILLIC SMALL LETTER TALL HARD SIGN + case 0x1C87 => 0x0463 // CYRILLIC SMALL LETTER TALL YAT + case 0x1C88 => 0xA64B // CYRILLIC SMALL LETTER UNBLENDED UK + case 0x1C90 => 0x10D0 // GEORGIAN MTAVRULI CAPITAL LETTER AN + case 0x1C91 => 0x10D1 // GEORGIAN MTAVRULI CAPITAL LETTER BAN + case 0x1C92 => 0x10D2 // GEORGIAN MTAVRULI CAPITAL LETTER GAN + case 0x1C93 => 0x10D3 // GEORGIAN MTAVRULI CAPITAL LETTER DON + case 0x1C94 => 0x10D4 // GEORGIAN MTAVRULI CAPITAL LETTER EN + case 0x1C95 => 0x10D5 // GEORGIAN MTAVRULI CAPITAL LETTER VIN + case 0x1C96 => 0x10D6 // GEORGIAN MTAVRULI CAPITAL LETTER ZEN + case 0x1C97 => 0x10D7 // GEORGIAN MTAVRULI CAPITAL LETTER TAN + case 0x1C98 => 0x10D8 // GEORGIAN MTAVRULI CAPITAL LETTER IN + case 0x1C99 => 0x10D9 // GEORGIAN MTAVRULI CAPITAL LETTER KAN + case 0x1C9A => 0x10DA // GEORGIAN MTAVRULI CAPITAL LETTER LAS + case 0x1C9B => 0x10DB // GEORGIAN MTAVRULI CAPITAL LETTER MAN + case 0x1C9C => 0x10DC // GEORGIAN MTAVRULI CAPITAL LETTER NAR + case 0x1C9D => 0x10DD // GEORGIAN MTAVRULI CAPITAL LETTER ON + case 0x1C9E => 0x10DE // GEORGIAN MTAVRULI CAPITAL LETTER PAR + case 0x1C9F => 0x10DF // GEORGIAN MTAVRULI CAPITAL LETTER ZHAR + case 0x1CA0 => 0x10E0 // GEORGIAN MTAVRULI CAPITAL LETTER RAE + case 0x1CA1 => 0x10E1 // GEORGIAN MTAVRULI CAPITAL LETTER SAN + case 0x1CA2 => 0x10E2 // GEORGIAN MTAVRULI CAPITAL LETTER TAR + case 0x1CA3 => 0x10E3 // GEORGIAN MTAVRULI CAPITAL LETTER UN + case 0x1CA4 => 0x10E4 // GEORGIAN MTAVRULI CAPITAL LETTER PHAR + case 0x1CA5 => 0x10E5 // GEORGIAN MTAVRULI CAPITAL LETTER KHAR + case 0x1CA6 => 0x10E6 // GEORGIAN MTAVRULI CAPITAL LETTER GHAN + case 0x1CA7 => 0x10E7 // GEORGIAN MTAVRULI CAPITAL LETTER QAR + case 0x1CA8 => 0x10E8 // GEORGIAN MTAVRULI CAPITAL LETTER SHIN + case 0x1CA9 => 0x10E9 // GEORGIAN MTAVRULI CAPITAL LETTER CHIN + case 0x1CAA => 0x10EA // GEORGIAN MTAVRULI CAPITAL LETTER CAN + case 0x1CAB => 0x10EB // GEORGIAN MTAVRULI CAPITAL LETTER JIL + case 0x1CAC => 0x10EC // GEORGIAN MTAVRULI CAPITAL LETTER CIL + case 0x1CAD => 0x10ED // GEORGIAN MTAVRULI CAPITAL LETTER CHAR + case 0x1CAE => 0x10EE // GEORGIAN MTAVRULI CAPITAL LETTER XAN + case 0x1CAF => 0x10EF // GEORGIAN MTAVRULI CAPITAL LETTER JHAN + case 0x1CB0 => 0x10F0 // GEORGIAN MTAVRULI CAPITAL LETTER HAE + case 0x1CB1 => 0x10F1 // GEORGIAN MTAVRULI CAPITAL LETTER HE + case 0x1CB2 => 0x10F2 // GEORGIAN MTAVRULI CAPITAL LETTER HIE + case 0x1CB3 => 0x10F3 // GEORGIAN MTAVRULI CAPITAL LETTER WE + case 0x1CB4 => 0x10F4 // GEORGIAN MTAVRULI CAPITAL LETTER HAR + case 0x1CB5 => 0x10F5 // GEORGIAN MTAVRULI CAPITAL LETTER HOE + case 0x1CB6 => 0x10F6 // GEORGIAN MTAVRULI CAPITAL LETTER FI + case 0x1CB7 => 0x10F7 // GEORGIAN MTAVRULI CAPITAL LETTER YN + case 0x1CB8 => 0x10F8 // GEORGIAN MTAVRULI CAPITAL LETTER ELIFI + case 0x1CB9 => 0x10F9 // GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN + case 0x1CBA => 0x10FA // GEORGIAN MTAVRULI CAPITAL LETTER AIN + case 0x1CBD => 0x10FD // GEORGIAN MTAVRULI CAPITAL LETTER AEN + case 0x1CBE => 0x10FE // GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN + case 0x1CBF => 0x10FF // GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN + case 0x1E00 => 0x1E01 // LATIN CAPITAL LETTER A WITH RING BELOW + case 0x1E02 => 0x1E03 // LATIN CAPITAL LETTER B WITH DOT ABOVE + case 0x1E04 => 0x1E05 // LATIN CAPITAL LETTER B WITH DOT BELOW + case 0x1E06 => 0x1E07 // LATIN CAPITAL LETTER B WITH LINE BELOW + case 0x1E08 => 0x1E09 // LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE + case 0x1E0A => 0x1E0B // LATIN CAPITAL LETTER D WITH DOT ABOVE + case 0x1E0C => 0x1E0D // LATIN CAPITAL LETTER D WITH DOT BELOW + case 0x1E0E => 0x1E0F // LATIN CAPITAL LETTER D WITH LINE BELOW + case 0x1E10 => 0x1E11 // LATIN CAPITAL LETTER D WITH CEDILLA + case 0x1E12 => 0x1E13 // LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW + case 0x1E14 => 0x1E15 // LATIN CAPITAL LETTER E WITH MACRON AND GRAVE + case 0x1E16 => 0x1E17 // LATIN CAPITAL LETTER E WITH MACRON AND ACUTE + case 0x1E18 => 0x1E19 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW + case 0x1E1A => 0x1E1B // LATIN CAPITAL LETTER E WITH TILDE BELOW + case 0x1E1C => 0x1E1D // LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE + case 0x1E1E => 0x1E1F // LATIN CAPITAL LETTER F WITH DOT ABOVE + case 0x1E20 => 0x1E21 // LATIN CAPITAL LETTER G WITH MACRON + case 0x1E22 => 0x1E23 // LATIN CAPITAL LETTER H WITH DOT ABOVE + case 0x1E24 => 0x1E25 // LATIN CAPITAL LETTER H WITH DOT BELOW + case 0x1E26 => 0x1E27 // LATIN CAPITAL LETTER H WITH DIAERESIS + case 0x1E28 => 0x1E29 // LATIN CAPITAL LETTER H WITH CEDILLA + case 0x1E2A => 0x1E2B // LATIN CAPITAL LETTER H WITH BREVE BELOW + case 0x1E2C => 0x1E2D // LATIN CAPITAL LETTER I WITH TILDE BELOW + case 0x1E2E => 0x1E2F // LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE + case 0x1E30 => 0x1E31 // LATIN CAPITAL LETTER K WITH ACUTE + case 0x1E32 => 0x1E33 // LATIN CAPITAL LETTER K WITH DOT BELOW + case 0x1E34 => 0x1E35 // LATIN CAPITAL LETTER K WITH LINE BELOW + case 0x1E36 => 0x1E37 // LATIN CAPITAL LETTER L WITH DOT BELOW + case 0x1E38 => 0x1E39 // LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON + case 0x1E3A => 0x1E3B // LATIN CAPITAL LETTER L WITH LINE BELOW + case 0x1E3C => 0x1E3D // LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW + case 0x1E3E => 0x1E3F // LATIN CAPITAL LETTER M WITH ACUTE + case 0x1E40 => 0x1E41 // LATIN CAPITAL LETTER M WITH DOT ABOVE + case 0x1E42 => 0x1E43 // LATIN CAPITAL LETTER M WITH DOT BELOW + case 0x1E44 => 0x1E45 // LATIN CAPITAL LETTER N WITH DOT ABOVE + case 0x1E46 => 0x1E47 // LATIN CAPITAL LETTER N WITH DOT BELOW + case 0x1E48 => 0x1E49 // LATIN CAPITAL LETTER N WITH LINE BELOW + case 0x1E4A => 0x1E4B // LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW + case 0x1E4C => 0x1E4D // LATIN CAPITAL LETTER O WITH TILDE AND ACUTE + case 0x1E4E => 0x1E4F // LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS + case 0x1E50 => 0x1E51 // LATIN CAPITAL LETTER O WITH MACRON AND GRAVE + case 0x1E52 => 0x1E53 // LATIN CAPITAL LETTER O WITH MACRON AND ACUTE + case 0x1E54 => 0x1E55 // LATIN CAPITAL LETTER P WITH ACUTE + case 0x1E56 => 0x1E57 // LATIN CAPITAL LETTER P WITH DOT ABOVE + case 0x1E58 => 0x1E59 // LATIN CAPITAL LETTER R WITH DOT ABOVE + case 0x1E5A => 0x1E5B // LATIN CAPITAL LETTER R WITH DOT BELOW + case 0x1E5C => 0x1E5D // LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON + case 0x1E5E => 0x1E5F // LATIN CAPITAL LETTER R WITH LINE BELOW + case 0x1E60 => 0x1E61 // LATIN CAPITAL LETTER S WITH DOT ABOVE + case 0x1E62 => 0x1E63 // LATIN CAPITAL LETTER S WITH DOT BELOW + case 0x1E64 => 0x1E65 // LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE + case 0x1E66 => 0x1E67 // LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE + case 0x1E68 => 0x1E69 // LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE + case 0x1E6A => 0x1E6B // LATIN CAPITAL LETTER T WITH DOT ABOVE + case 0x1E6C => 0x1E6D // LATIN CAPITAL LETTER T WITH DOT BELOW + case 0x1E6E => 0x1E6F // LATIN CAPITAL LETTER T WITH LINE BELOW + case 0x1E70 => 0x1E71 // LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW + case 0x1E72 => 0x1E73 // LATIN CAPITAL LETTER U WITH DIAERESIS BELOW + case 0x1E74 => 0x1E75 // LATIN CAPITAL LETTER U WITH TILDE BELOW + case 0x1E76 => 0x1E77 // LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW + case 0x1E78 => 0x1E79 // LATIN CAPITAL LETTER U WITH TILDE AND ACUTE + case 0x1E7A => 0x1E7B // LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS + case 0x1E7C => 0x1E7D // LATIN CAPITAL LETTER V WITH TILDE + case 0x1E7E => 0x1E7F // LATIN CAPITAL LETTER V WITH DOT BELOW + case 0x1E80 => 0x1E81 // LATIN CAPITAL LETTER W WITH GRAVE + case 0x1E82 => 0x1E83 // LATIN CAPITAL LETTER W WITH ACUTE + case 0x1E84 => 0x1E85 // LATIN CAPITAL LETTER W WITH DIAERESIS + case 0x1E86 => 0x1E87 // LATIN CAPITAL LETTER W WITH DOT ABOVE + case 0x1E88 => 0x1E89 // LATIN CAPITAL LETTER W WITH DOT BELOW + case 0x1E8A => 0x1E8B // LATIN CAPITAL LETTER X WITH DOT ABOVE + case 0x1E8C => 0x1E8D // LATIN CAPITAL LETTER X WITH DIAERESIS + case 0x1E8E => 0x1E8F // LATIN CAPITAL LETTER Y WITH DOT ABOVE + case 0x1E90 => 0x1E91 // LATIN CAPITAL LETTER Z WITH CIRCUMFLEX + case 0x1E92 => 0x1E93 // LATIN CAPITAL LETTER Z WITH DOT BELOW + case 0x1E94 => 0x1E95 // LATIN CAPITAL LETTER Z WITH LINE BELOW + case 0x1E9B => 0x1E61 // LATIN SMALL LETTER LONG S WITH DOT ABOVE + case 0x1EA0 => 0x1EA1 // LATIN CAPITAL LETTER A WITH DOT BELOW + case 0x1EA2 => 0x1EA3 // LATIN CAPITAL LETTER A WITH HOOK ABOVE + case 0x1EA4 => 0x1EA5 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE + case 0x1EA6 => 0x1EA7 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE + case 0x1EA8 => 0x1EA9 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1EAA => 0x1EAB // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE + case 0x1EAC => 0x1EAD // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW + case 0x1EAE => 0x1EAF // LATIN CAPITAL LETTER A WITH BREVE AND ACUTE + case 0x1EB0 => 0x1EB1 // LATIN CAPITAL LETTER A WITH BREVE AND GRAVE + case 0x1EB2 => 0x1EB3 // LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE + case 0x1EB4 => 0x1EB5 // LATIN CAPITAL LETTER A WITH BREVE AND TILDE + case 0x1EB6 => 0x1EB7 // LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW + case 0x1EB8 => 0x1EB9 // LATIN CAPITAL LETTER E WITH DOT BELOW + case 0x1EBA => 0x1EBB // LATIN CAPITAL LETTER E WITH HOOK ABOVE + case 0x1EBC => 0x1EBD // LATIN CAPITAL LETTER E WITH TILDE + case 0x1EBE => 0x1EBF // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE + case 0x1EC0 => 0x1EC1 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE + case 0x1EC2 => 0x1EC3 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1EC4 => 0x1EC5 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE + case 0x1EC6 => 0x1EC7 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW + case 0x1EC8 => 0x1EC9 // LATIN CAPITAL LETTER I WITH HOOK ABOVE + case 0x1ECA => 0x1ECB // LATIN CAPITAL LETTER I WITH DOT BELOW + case 0x1ECC => 0x1ECD // LATIN CAPITAL LETTER O WITH DOT BELOW + case 0x1ECE => 0x1ECF // LATIN CAPITAL LETTER O WITH HOOK ABOVE + case 0x1ED0 => 0x1ED1 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE + case 0x1ED2 => 0x1ED3 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE + case 0x1ED4 => 0x1ED5 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1ED6 => 0x1ED7 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE + case 0x1ED8 => 0x1ED9 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW + case 0x1EDA => 0x1EDB // LATIN CAPITAL LETTER O WITH HORN AND ACUTE + case 0x1EDC => 0x1EDD // LATIN CAPITAL LETTER O WITH HORN AND GRAVE + case 0x1EDE => 0x1EDF // LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE + case 0x1EE0 => 0x1EE1 // LATIN CAPITAL LETTER O WITH HORN AND TILDE + case 0x1EE2 => 0x1EE3 // LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW + case 0x1EE4 => 0x1EE5 // LATIN CAPITAL LETTER U WITH DOT BELOW + case 0x1EE6 => 0x1EE7 // LATIN CAPITAL LETTER U WITH HOOK ABOVE + case 0x1EE8 => 0x1EE9 // LATIN CAPITAL LETTER U WITH HORN AND ACUTE + case 0x1EEA => 0x1EEB // LATIN CAPITAL LETTER U WITH HORN AND GRAVE + case 0x1EEC => 0x1EED // LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE + case 0x1EEE => 0x1EEF // LATIN CAPITAL LETTER U WITH HORN AND TILDE + case 0x1EF0 => 0x1EF1 // LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW + case 0x1EF2 => 0x1EF3 // LATIN CAPITAL LETTER Y WITH GRAVE + case 0x1EF4 => 0x1EF5 // LATIN CAPITAL LETTER Y WITH DOT BELOW + case 0x1EF6 => 0x1EF7 // LATIN CAPITAL LETTER Y WITH HOOK ABOVE + case 0x1EF8 => 0x1EF9 // LATIN CAPITAL LETTER Y WITH TILDE + case 0x1EFA => 0x1EFB // LATIN CAPITAL LETTER MIDDLE-WELSH LL + case 0x1EFC => 0x1EFD // LATIN CAPITAL LETTER MIDDLE-WELSH V + case 0x1EFE => 0x1EFF // LATIN CAPITAL LETTER Y WITH LOOP + case 0x1F08 => 0x1F00 // GREEK CAPITAL LETTER ALPHA WITH PSILI + case 0x1F09 => 0x1F01 // GREEK CAPITAL LETTER ALPHA WITH DASIA + case 0x1F0A => 0x1F02 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA + case 0x1F0B => 0x1F03 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA + case 0x1F0C => 0x1F04 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA + case 0x1F0D => 0x1F05 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA + case 0x1F0E => 0x1F06 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI + case 0x1F0F => 0x1F07 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI + case 0x1F18 => 0x1F10 // GREEK CAPITAL LETTER EPSILON WITH PSILI + case 0x1F19 => 0x1F11 // GREEK CAPITAL LETTER EPSILON WITH DASIA + case 0x1F1A => 0x1F12 // GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA + case 0x1F1B => 0x1F13 // GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA + case 0x1F1C => 0x1F14 // GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA + case 0x1F1D => 0x1F15 // GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA + case 0x1F28 => 0x1F20 // GREEK CAPITAL LETTER ETA WITH PSILI + case 0x1F29 => 0x1F21 // GREEK CAPITAL LETTER ETA WITH DASIA + case 0x1F2A => 0x1F22 // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA + case 0x1F2B => 0x1F23 // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA + case 0x1F2C => 0x1F24 // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA + case 0x1F2D => 0x1F25 // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA + case 0x1F2E => 0x1F26 // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI + case 0x1F2F => 0x1F27 // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI + case 0x1F38 => 0x1F30 // GREEK CAPITAL LETTER IOTA WITH PSILI + case 0x1F39 => 0x1F31 // GREEK CAPITAL LETTER IOTA WITH DASIA + case 0x1F3A => 0x1F32 // GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA + case 0x1F3B => 0x1F33 // GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA + case 0x1F3C => 0x1F34 // GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA + case 0x1F3D => 0x1F35 // GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA + case 0x1F3E => 0x1F36 // GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI + case 0x1F3F => 0x1F37 // GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI + case 0x1F48 => 0x1F40 // GREEK CAPITAL LETTER OMICRON WITH PSILI + case 0x1F49 => 0x1F41 // GREEK CAPITAL LETTER OMICRON WITH DASIA + case 0x1F4A => 0x1F42 // GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA + case 0x1F4B => 0x1F43 // GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA + case 0x1F4C => 0x1F44 // GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA + case 0x1F4D => 0x1F45 // GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA + case 0x1F59 => 0x1F51 // GREEK CAPITAL LETTER UPSILON WITH DASIA + case 0x1F5B => 0x1F53 // GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA + case 0x1F5D => 0x1F55 // GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA + case 0x1F5F => 0x1F57 // GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI + case 0x1F68 => 0x1F60 // GREEK CAPITAL LETTER OMEGA WITH PSILI + case 0x1F69 => 0x1F61 // GREEK CAPITAL LETTER OMEGA WITH DASIA + case 0x1F6A => 0x1F62 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA + case 0x1F6B => 0x1F63 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA + case 0x1F6C => 0x1F64 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA + case 0x1F6D => 0x1F65 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA + case 0x1F6E => 0x1F66 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI + case 0x1F6F => 0x1F67 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI + case 0x1FB8 => 0x1FB0 // GREEK CAPITAL LETTER ALPHA WITH VRACHY + case 0x1FB9 => 0x1FB1 // GREEK CAPITAL LETTER ALPHA WITH MACRON + case 0x1FBA => 0x1F70 // GREEK CAPITAL LETTER ALPHA WITH VARIA + case 0x1FBB => 0x1F71 // GREEK CAPITAL LETTER ALPHA WITH OXIA + case 0x1FBE => 0x03B9 // GREEK PROSGEGRAMMENI + case 0x1FC8 => 0x1F72 // GREEK CAPITAL LETTER EPSILON WITH VARIA + case 0x1FC9 => 0x1F73 // GREEK CAPITAL LETTER EPSILON WITH OXIA + case 0x1FCA => 0x1F74 // GREEK CAPITAL LETTER ETA WITH VARIA + case 0x1FCB => 0x1F75 // GREEK CAPITAL LETTER ETA WITH OXIA + case 0x1FD8 => 0x1FD0 // GREEK CAPITAL LETTER IOTA WITH VRACHY + case 0x1FD9 => 0x1FD1 // GREEK CAPITAL LETTER IOTA WITH MACRON + case 0x1FDA => 0x1F76 // GREEK CAPITAL LETTER IOTA WITH VARIA + case 0x1FDB => 0x1F77 // GREEK CAPITAL LETTER IOTA WITH OXIA + case 0x1FE8 => 0x1FE0 // GREEK CAPITAL LETTER UPSILON WITH VRACHY + case 0x1FE9 => 0x1FE1 // GREEK CAPITAL LETTER UPSILON WITH MACRON + case 0x1FEA => 0x1F7A // GREEK CAPITAL LETTER UPSILON WITH VARIA + case 0x1FEB => 0x1F7B // GREEK CAPITAL LETTER UPSILON WITH OXIA + case 0x1FEC => 0x1FE5 // GREEK CAPITAL LETTER RHO WITH DASIA + case 0x1FF8 => 0x1F78 // GREEK CAPITAL LETTER OMICRON WITH VARIA + case 0x1FF9 => 0x1F79 // GREEK CAPITAL LETTER OMICRON WITH OXIA + case 0x1FFA => 0x1F7C // GREEK CAPITAL LETTER OMEGA WITH VARIA + case 0x1FFB => 0x1F7D // GREEK CAPITAL LETTER OMEGA WITH OXIA + case 0x2126 => 0x03C9 // OHM SIGN + case 0x212A => 0x006B // KELVIN SIGN + case 0x212B => 0x00E5 // ANGSTROM SIGN + case 0x2132 => 0x214E // TURNED CAPITAL F + case 0x2160 => 0x2170 // ROMAN NUMERAL ONE + case 0x2161 => 0x2171 // ROMAN NUMERAL TWO + case 0x2162 => 0x2172 // ROMAN NUMERAL THREE + case 0x2163 => 0x2173 // ROMAN NUMERAL FOUR + case 0x2164 => 0x2174 // ROMAN NUMERAL FIVE + case 0x2165 => 0x2175 // ROMAN NUMERAL SIX + case 0x2166 => 0x2176 // ROMAN NUMERAL SEVEN + case 0x2167 => 0x2177 // ROMAN NUMERAL EIGHT + case 0x2168 => 0x2178 // ROMAN NUMERAL NINE + case 0x2169 => 0x2179 // ROMAN NUMERAL TEN + case 0x216A => 0x217A // ROMAN NUMERAL ELEVEN + case 0x216B => 0x217B // ROMAN NUMERAL TWELVE + case 0x216C => 0x217C // ROMAN NUMERAL FIFTY + case 0x216D => 0x217D // ROMAN NUMERAL ONE HUNDRED + case 0x216E => 0x217E // ROMAN NUMERAL FIVE HUNDRED + case 0x216F => 0x217F // ROMAN NUMERAL ONE THOUSAND + case 0x2183 => 0x2184 // ROMAN NUMERAL REVERSED ONE HUNDRED + case 0x24B6 => 0x24D0 // CIRCLED LATIN CAPITAL LETTER A + case 0x24B7 => 0x24D1 // CIRCLED LATIN CAPITAL LETTER B + case 0x24B8 => 0x24D2 // CIRCLED LATIN CAPITAL LETTER C + case 0x24B9 => 0x24D3 // CIRCLED LATIN CAPITAL LETTER D + case 0x24BA => 0x24D4 // CIRCLED LATIN CAPITAL LETTER E + case 0x24BB => 0x24D5 // CIRCLED LATIN CAPITAL LETTER F + case 0x24BC => 0x24D6 // CIRCLED LATIN CAPITAL LETTER G + case 0x24BD => 0x24D7 // CIRCLED LATIN CAPITAL LETTER H + case 0x24BE => 0x24D8 // CIRCLED LATIN CAPITAL LETTER I + case 0x24BF => 0x24D9 // CIRCLED LATIN CAPITAL LETTER J + case 0x24C0 => 0x24DA // CIRCLED LATIN CAPITAL LETTER K + case 0x24C1 => 0x24DB // CIRCLED LATIN CAPITAL LETTER L + case 0x24C2 => 0x24DC // CIRCLED LATIN CAPITAL LETTER M + case 0x24C3 => 0x24DD // CIRCLED LATIN CAPITAL LETTER N + case 0x24C4 => 0x24DE // CIRCLED LATIN CAPITAL LETTER O + case 0x24C5 => 0x24DF // CIRCLED LATIN CAPITAL LETTER P + case 0x24C6 => 0x24E0 // CIRCLED LATIN CAPITAL LETTER Q + case 0x24C7 => 0x24E1 // CIRCLED LATIN CAPITAL LETTER R + case 0x24C8 => 0x24E2 // CIRCLED LATIN CAPITAL LETTER S + case 0x24C9 => 0x24E3 // CIRCLED LATIN CAPITAL LETTER T + case 0x24CA => 0x24E4 // CIRCLED LATIN CAPITAL LETTER U + case 0x24CB => 0x24E5 // CIRCLED LATIN CAPITAL LETTER V + case 0x24CC => 0x24E6 // CIRCLED LATIN CAPITAL LETTER W + case 0x24CD => 0x24E7 // CIRCLED LATIN CAPITAL LETTER X + case 0x24CE => 0x24E8 // CIRCLED LATIN CAPITAL LETTER Y + case 0x24CF => 0x24E9 // CIRCLED LATIN CAPITAL LETTER Z + case 0x2C00 => 0x2C30 // GLAGOLITIC CAPITAL LETTER AZU + case 0x2C01 => 0x2C31 // GLAGOLITIC CAPITAL LETTER BUKY + case 0x2C02 => 0x2C32 // GLAGOLITIC CAPITAL LETTER VEDE + case 0x2C03 => 0x2C33 // GLAGOLITIC CAPITAL LETTER GLAGOLI + case 0x2C04 => 0x2C34 // GLAGOLITIC CAPITAL LETTER DOBRO + case 0x2C05 => 0x2C35 // GLAGOLITIC CAPITAL LETTER YESTU + case 0x2C06 => 0x2C36 // GLAGOLITIC CAPITAL LETTER ZHIVETE + case 0x2C07 => 0x2C37 // GLAGOLITIC CAPITAL LETTER DZELO + case 0x2C08 => 0x2C38 // GLAGOLITIC CAPITAL LETTER ZEMLJA + case 0x2C09 => 0x2C39 // GLAGOLITIC CAPITAL LETTER IZHE + case 0x2C0A => 0x2C3A // GLAGOLITIC CAPITAL LETTER INITIAL IZHE + case 0x2C0B => 0x2C3B // GLAGOLITIC CAPITAL LETTER I + case 0x2C0C => 0x2C3C // GLAGOLITIC CAPITAL LETTER DJERVI + case 0x2C0D => 0x2C3D // GLAGOLITIC CAPITAL LETTER KAKO + case 0x2C0E => 0x2C3E // GLAGOLITIC CAPITAL LETTER LJUDIJE + case 0x2C0F => 0x2C3F // GLAGOLITIC CAPITAL LETTER MYSLITE + case 0x2C10 => 0x2C40 // GLAGOLITIC CAPITAL LETTER NASHI + case 0x2C11 => 0x2C41 // GLAGOLITIC CAPITAL LETTER ONU + case 0x2C12 => 0x2C42 // GLAGOLITIC CAPITAL LETTER POKOJI + case 0x2C13 => 0x2C43 // GLAGOLITIC CAPITAL LETTER RITSI + case 0x2C14 => 0x2C44 // GLAGOLITIC CAPITAL LETTER SLOVO + case 0x2C15 => 0x2C45 // GLAGOLITIC CAPITAL LETTER TVRIDO + case 0x2C16 => 0x2C46 // GLAGOLITIC CAPITAL LETTER UKU + case 0x2C17 => 0x2C47 // GLAGOLITIC CAPITAL LETTER FRITU + case 0x2C18 => 0x2C48 // GLAGOLITIC CAPITAL LETTER HERU + case 0x2C19 => 0x2C49 // GLAGOLITIC CAPITAL LETTER OTU + case 0x2C1A => 0x2C4A // GLAGOLITIC CAPITAL LETTER PE + case 0x2C1B => 0x2C4B // GLAGOLITIC CAPITAL LETTER SHTA + case 0x2C1C => 0x2C4C // GLAGOLITIC CAPITAL LETTER TSI + case 0x2C1D => 0x2C4D // GLAGOLITIC CAPITAL LETTER CHRIVI + case 0x2C1E => 0x2C4E // GLAGOLITIC CAPITAL LETTER SHA + case 0x2C1F => 0x2C4F // GLAGOLITIC CAPITAL LETTER YERU + case 0x2C20 => 0x2C50 // GLAGOLITIC CAPITAL LETTER YERI + case 0x2C21 => 0x2C51 // GLAGOLITIC CAPITAL LETTER YATI + case 0x2C22 => 0x2C52 // GLAGOLITIC CAPITAL LETTER SPIDERY HA + case 0x2C23 => 0x2C53 // GLAGOLITIC CAPITAL LETTER YU + case 0x2C24 => 0x2C54 // GLAGOLITIC CAPITAL LETTER SMALL YUS + case 0x2C25 => 0x2C55 // GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL + case 0x2C26 => 0x2C56 // GLAGOLITIC CAPITAL LETTER YO + case 0x2C27 => 0x2C57 // GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS + case 0x2C28 => 0x2C58 // GLAGOLITIC CAPITAL LETTER BIG YUS + case 0x2C29 => 0x2C59 // GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS + case 0x2C2A => 0x2C5A // GLAGOLITIC CAPITAL LETTER FITA + case 0x2C2B => 0x2C5B // GLAGOLITIC CAPITAL LETTER IZHITSA + case 0x2C2C => 0x2C5C // GLAGOLITIC CAPITAL LETTER SHTAPIC + case 0x2C2D => 0x2C5D // GLAGOLITIC CAPITAL LETTER TROKUTASTI A + case 0x2C2E => 0x2C5E // GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE + case 0x2C2F => 0x2C5F // GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI + case 0x2C60 => 0x2C61 // LATIN CAPITAL LETTER L WITH DOUBLE BAR + case 0x2C62 => 0x026B // LATIN CAPITAL LETTER L WITH MIDDLE TILDE + case 0x2C63 => 0x1D7D // LATIN CAPITAL LETTER P WITH STROKE + case 0x2C64 => 0x027D // LATIN CAPITAL LETTER R WITH TAIL + case 0x2C67 => 0x2C68 // LATIN CAPITAL LETTER H WITH DESCENDER + case 0x2C69 => 0x2C6A // LATIN CAPITAL LETTER K WITH DESCENDER + case 0x2C6B => 0x2C6C // LATIN CAPITAL LETTER Z WITH DESCENDER + case 0x2C6D => 0x0251 // LATIN CAPITAL LETTER ALPHA + case 0x2C6E => 0x0271 // LATIN CAPITAL LETTER M WITH HOOK + case 0x2C6F => 0x0250 // LATIN CAPITAL LETTER TURNED A + case 0x2C70 => 0x0252 // LATIN CAPITAL LETTER TURNED ALPHA + case 0x2C72 => 0x2C73 // LATIN CAPITAL LETTER W WITH HOOK + case 0x2C75 => 0x2C76 // LATIN CAPITAL LETTER HALF H + case 0x2C7E => 0x023F // LATIN CAPITAL LETTER S WITH SWASH TAIL + case 0x2C7F => 0x0240 // LATIN CAPITAL LETTER Z WITH SWASH TAIL + case 0x2C80 => 0x2C81 // COPTIC CAPITAL LETTER ALFA + case 0x2C82 => 0x2C83 // COPTIC CAPITAL LETTER VIDA + case 0x2C84 => 0x2C85 // COPTIC CAPITAL LETTER GAMMA + case 0x2C86 => 0x2C87 // COPTIC CAPITAL LETTER DALDA + case 0x2C88 => 0x2C89 // COPTIC CAPITAL LETTER EIE + case 0x2C8A => 0x2C8B // COPTIC CAPITAL LETTER SOU + case 0x2C8C => 0x2C8D // COPTIC CAPITAL LETTER ZATA + case 0x2C8E => 0x2C8F // COPTIC CAPITAL LETTER HATE + case 0x2C90 => 0x2C91 // COPTIC CAPITAL LETTER THETHE + case 0x2C92 => 0x2C93 // COPTIC CAPITAL LETTER IAUDA + case 0x2C94 => 0x2C95 // COPTIC CAPITAL LETTER KAPA + case 0x2C96 => 0x2C97 // COPTIC CAPITAL LETTER LAULA + case 0x2C98 => 0x2C99 // COPTIC CAPITAL LETTER MI + case 0x2C9A => 0x2C9B // COPTIC CAPITAL LETTER NI + case 0x2C9C => 0x2C9D // COPTIC CAPITAL LETTER KSI + case 0x2C9E => 0x2C9F // COPTIC CAPITAL LETTER O + case 0x2CA0 => 0x2CA1 // COPTIC CAPITAL LETTER PI + case 0x2CA2 => 0x2CA3 // COPTIC CAPITAL LETTER RO + case 0x2CA4 => 0x2CA5 // COPTIC CAPITAL LETTER SIMA + case 0x2CA6 => 0x2CA7 // COPTIC CAPITAL LETTER TAU + case 0x2CA8 => 0x2CA9 // COPTIC CAPITAL LETTER UA + case 0x2CAA => 0x2CAB // COPTIC CAPITAL LETTER FI + case 0x2CAC => 0x2CAD // COPTIC CAPITAL LETTER KHI + case 0x2CAE => 0x2CAF // COPTIC CAPITAL LETTER PSI + case 0x2CB0 => 0x2CB1 // COPTIC CAPITAL LETTER OOU + case 0x2CB2 => 0x2CB3 // COPTIC CAPITAL LETTER DIALECT-P ALEF + case 0x2CB4 => 0x2CB5 // COPTIC CAPITAL LETTER OLD COPTIC AIN + case 0x2CB6 => 0x2CB7 // COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE + case 0x2CB8 => 0x2CB9 // COPTIC CAPITAL LETTER DIALECT-P KAPA + case 0x2CBA => 0x2CBB // COPTIC CAPITAL LETTER DIALECT-P NI + case 0x2CBC => 0x2CBD // COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI + case 0x2CBE => 0x2CBF // COPTIC CAPITAL LETTER OLD COPTIC OOU + case 0x2CC0 => 0x2CC1 // COPTIC CAPITAL LETTER SAMPI + case 0x2CC2 => 0x2CC3 // COPTIC CAPITAL LETTER CROSSED SHEI + case 0x2CC4 => 0x2CC5 // COPTIC CAPITAL LETTER OLD COPTIC SHEI + case 0x2CC6 => 0x2CC7 // COPTIC CAPITAL LETTER OLD COPTIC ESH + case 0x2CC8 => 0x2CC9 // COPTIC CAPITAL LETTER AKHMIMIC KHEI + case 0x2CCA => 0x2CCB // COPTIC CAPITAL LETTER DIALECT-P HORI + case 0x2CCC => 0x2CCD // COPTIC CAPITAL LETTER OLD COPTIC HORI + case 0x2CCE => 0x2CCF // COPTIC CAPITAL LETTER OLD COPTIC HA + case 0x2CD0 => 0x2CD1 // COPTIC CAPITAL LETTER L-SHAPED HA + case 0x2CD2 => 0x2CD3 // COPTIC CAPITAL LETTER OLD COPTIC HEI + case 0x2CD4 => 0x2CD5 // COPTIC CAPITAL LETTER OLD COPTIC HAT + case 0x2CD6 => 0x2CD7 // COPTIC CAPITAL LETTER OLD COPTIC GANGIA + case 0x2CD8 => 0x2CD9 // COPTIC CAPITAL LETTER OLD COPTIC DJA + case 0x2CDA => 0x2CDB // COPTIC CAPITAL LETTER OLD COPTIC SHIMA + case 0x2CDC => 0x2CDD // COPTIC CAPITAL LETTER OLD NUBIAN SHIMA + case 0x2CDE => 0x2CDF // COPTIC CAPITAL LETTER OLD NUBIAN NGI + case 0x2CE0 => 0x2CE1 // COPTIC CAPITAL LETTER OLD NUBIAN NYI + case 0x2CE2 => 0x2CE3 // COPTIC CAPITAL LETTER OLD NUBIAN WAU + case 0x2CEB => 0x2CEC // COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI + case 0x2CED => 0x2CEE // COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA + case 0x2CF2 => 0x2CF3 // COPTIC CAPITAL LETTER BOHAIRIC KHEI + case 0xA640 => 0xA641 // CYRILLIC CAPITAL LETTER ZEMLYA + case 0xA642 => 0xA643 // CYRILLIC CAPITAL LETTER DZELO + case 0xA644 => 0xA645 // CYRILLIC CAPITAL LETTER REVERSED DZE + case 0xA646 => 0xA647 // CYRILLIC CAPITAL LETTER IOTA + case 0xA648 => 0xA649 // CYRILLIC CAPITAL LETTER DJERV + case 0xA64A => 0xA64B // CYRILLIC CAPITAL LETTER MONOGRAPH UK + case 0xA64C => 0xA64D // CYRILLIC CAPITAL LETTER BROAD OMEGA + case 0xA64E => 0xA64F // CYRILLIC CAPITAL LETTER NEUTRAL YER + case 0xA650 => 0xA651 // CYRILLIC CAPITAL LETTER YERU WITH BACK YER + case 0xA652 => 0xA653 // CYRILLIC CAPITAL LETTER IOTIFIED YAT + case 0xA654 => 0xA655 // CYRILLIC CAPITAL LETTER REVERSED YU + case 0xA656 => 0xA657 // CYRILLIC CAPITAL LETTER IOTIFIED A + case 0xA658 => 0xA659 // CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS + case 0xA65A => 0xA65B // CYRILLIC CAPITAL LETTER BLENDED YUS + case 0xA65C => 0xA65D // CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS + case 0xA65E => 0xA65F // CYRILLIC CAPITAL LETTER YN + case 0xA660 => 0xA661 // CYRILLIC CAPITAL LETTER REVERSED TSE + case 0xA662 => 0xA663 // CYRILLIC CAPITAL LETTER SOFT DE + case 0xA664 => 0xA665 // CYRILLIC CAPITAL LETTER SOFT EL + case 0xA666 => 0xA667 // CYRILLIC CAPITAL LETTER SOFT EM + case 0xA668 => 0xA669 // CYRILLIC CAPITAL LETTER MONOCULAR O + case 0xA66A => 0xA66B // CYRILLIC CAPITAL LETTER BINOCULAR O + case 0xA66C => 0xA66D // CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O + case 0xA680 => 0xA681 // CYRILLIC CAPITAL LETTER DWE + case 0xA682 => 0xA683 // CYRILLIC CAPITAL LETTER DZWE + case 0xA684 => 0xA685 // CYRILLIC CAPITAL LETTER ZHWE + case 0xA686 => 0xA687 // CYRILLIC CAPITAL LETTER CCHE + case 0xA688 => 0xA689 // CYRILLIC CAPITAL LETTER DZZE + case 0xA68A => 0xA68B // CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK + case 0xA68C => 0xA68D // CYRILLIC CAPITAL LETTER TWE + case 0xA68E => 0xA68F // CYRILLIC CAPITAL LETTER TSWE + case 0xA690 => 0xA691 // CYRILLIC CAPITAL LETTER TSSE + case 0xA692 => 0xA693 // CYRILLIC CAPITAL LETTER TCHE + case 0xA694 => 0xA695 // CYRILLIC CAPITAL LETTER HWE + case 0xA696 => 0xA697 // CYRILLIC CAPITAL LETTER SHWE + case 0xA698 => 0xA699 // CYRILLIC CAPITAL LETTER DOUBLE O + case 0xA69A => 0xA69B // CYRILLIC CAPITAL LETTER CROSSED O + case 0xA722 => 0xA723 // LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF + case 0xA724 => 0xA725 // LATIN CAPITAL LETTER EGYPTOLOGICAL AIN + case 0xA726 => 0xA727 // LATIN CAPITAL LETTER HENG + case 0xA728 => 0xA729 // LATIN CAPITAL LETTER TZ + case 0xA72A => 0xA72B // LATIN CAPITAL LETTER TRESILLO + case 0xA72C => 0xA72D // LATIN CAPITAL LETTER CUATRILLO + case 0xA72E => 0xA72F // LATIN CAPITAL LETTER CUATRILLO WITH COMMA + case 0xA732 => 0xA733 // LATIN CAPITAL LETTER AA + case 0xA734 => 0xA735 // LATIN CAPITAL LETTER AO + case 0xA736 => 0xA737 // LATIN CAPITAL LETTER AU + case 0xA738 => 0xA739 // LATIN CAPITAL LETTER AV + case 0xA73A => 0xA73B // LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR + case 0xA73C => 0xA73D // LATIN CAPITAL LETTER AY + case 0xA73E => 0xA73F // LATIN CAPITAL LETTER REVERSED C WITH DOT + case 0xA740 => 0xA741 // LATIN CAPITAL LETTER K WITH STROKE + case 0xA742 => 0xA743 // LATIN CAPITAL LETTER K WITH DIAGONAL STROKE + case 0xA744 => 0xA745 // LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE + case 0xA746 => 0xA747 // LATIN CAPITAL LETTER BROKEN L + case 0xA748 => 0xA749 // LATIN CAPITAL LETTER L WITH HIGH STROKE + case 0xA74A => 0xA74B // LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY + case 0xA74C => 0xA74D // LATIN CAPITAL LETTER O WITH LOOP + case 0xA74E => 0xA74F // LATIN CAPITAL LETTER OO + case 0xA750 => 0xA751 // LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER + case 0xA752 => 0xA753 // LATIN CAPITAL LETTER P WITH FLOURISH + case 0xA754 => 0xA755 // LATIN CAPITAL LETTER P WITH SQUIRREL TAIL + case 0xA756 => 0xA757 // LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER + case 0xA758 => 0xA759 // LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE + case 0xA75A => 0xA75B // LATIN CAPITAL LETTER R ROTUNDA + case 0xA75C => 0xA75D // LATIN CAPITAL LETTER RUM ROTUNDA + case 0xA75E => 0xA75F // LATIN CAPITAL LETTER V WITH DIAGONAL STROKE + case 0xA760 => 0xA761 // LATIN CAPITAL LETTER VY + case 0xA762 => 0xA763 // LATIN CAPITAL LETTER VISIGOTHIC Z + case 0xA764 => 0xA765 // LATIN CAPITAL LETTER THORN WITH STROKE + case 0xA766 => 0xA767 // LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER + case 0xA768 => 0xA769 // LATIN CAPITAL LETTER VEND + case 0xA76A => 0xA76B // LATIN CAPITAL LETTER ET + case 0xA76C => 0xA76D // LATIN CAPITAL LETTER IS + case 0xA76E => 0xA76F // LATIN CAPITAL LETTER CON + case 0xA779 => 0xA77A // LATIN CAPITAL LETTER INSULAR D + case 0xA77B => 0xA77C // LATIN CAPITAL LETTER INSULAR F + case 0xA77D => 0x1D79 // LATIN CAPITAL LETTER INSULAR G + case 0xA77E => 0xA77F // LATIN CAPITAL LETTER TURNED INSULAR G + case 0xA780 => 0xA781 // LATIN CAPITAL LETTER TURNED L + case 0xA782 => 0xA783 // LATIN CAPITAL LETTER INSULAR R + case 0xA784 => 0xA785 // LATIN CAPITAL LETTER INSULAR S + case 0xA786 => 0xA787 // LATIN CAPITAL LETTER INSULAR T + case 0xA78B => 0xA78C // LATIN CAPITAL LETTER SALTILLO + case 0xA78D => 0x0265 // LATIN CAPITAL LETTER TURNED H + case 0xA790 => 0xA791 // LATIN CAPITAL LETTER N WITH DESCENDER + case 0xA792 => 0xA793 // LATIN CAPITAL LETTER C WITH BAR + case 0xA796 => 0xA797 // LATIN CAPITAL LETTER B WITH FLOURISH + case 0xA798 => 0xA799 // LATIN CAPITAL LETTER F WITH STROKE + case 0xA79A => 0xA79B // LATIN CAPITAL LETTER VOLAPUK AE + case 0xA79C => 0xA79D // LATIN CAPITAL LETTER VOLAPUK OE + case 0xA79E => 0xA79F // LATIN CAPITAL LETTER VOLAPUK UE + case 0xA7A0 => 0xA7A1 // LATIN CAPITAL LETTER G WITH OBLIQUE STROKE + case 0xA7A2 => 0xA7A3 // LATIN CAPITAL LETTER K WITH OBLIQUE STROKE + case 0xA7A4 => 0xA7A5 // LATIN CAPITAL LETTER N WITH OBLIQUE STROKE + case 0xA7A6 => 0xA7A7 // LATIN CAPITAL LETTER R WITH OBLIQUE STROKE + case 0xA7A8 => 0xA7A9 // LATIN CAPITAL LETTER S WITH OBLIQUE STROKE + case 0xA7AA => 0x0266 // LATIN CAPITAL LETTER H WITH HOOK + case 0xA7AB => 0x025C // LATIN CAPITAL LETTER REVERSED OPEN E + case 0xA7AC => 0x0261 // LATIN CAPITAL LETTER SCRIPT G + case 0xA7AD => 0x026C // LATIN CAPITAL LETTER L WITH BELT + case 0xA7AE => 0x026A // LATIN CAPITAL LETTER SMALL CAPITAL I + case 0xA7B0 => 0x029E // LATIN CAPITAL LETTER TURNED K + case 0xA7B1 => 0x0287 // LATIN CAPITAL LETTER TURNED T + case 0xA7B2 => 0x029D // LATIN CAPITAL LETTER J WITH CROSSED-TAIL + case 0xA7B3 => 0xAB53 // LATIN CAPITAL LETTER CHI + case 0xA7B4 => 0xA7B5 // LATIN CAPITAL LETTER BETA + case 0xA7B6 => 0xA7B7 // LATIN CAPITAL LETTER OMEGA + case 0xA7B8 => 0xA7B9 // LATIN CAPITAL LETTER U WITH STROKE + case 0xA7BA => 0xA7BB // LATIN CAPITAL LETTER GLOTTAL A + case 0xA7BC => 0xA7BD // LATIN CAPITAL LETTER GLOTTAL I + case 0xA7BE => 0xA7BF // LATIN CAPITAL LETTER GLOTTAL U + case 0xA7C0 => 0xA7C1 // LATIN CAPITAL LETTER OLD POLISH O + case 0xA7C2 => 0xA7C3 // LATIN CAPITAL LETTER ANGLICANA W + case 0xA7C4 => 0xA794 // LATIN CAPITAL LETTER C WITH PALATAL HOOK + case 0xA7C5 => 0x0282 // LATIN CAPITAL LETTER S WITH HOOK + case 0xA7C6 => 0x1D8E // LATIN CAPITAL LETTER Z WITH PALATAL HOOK + case 0xA7C7 => 0xA7C8 // LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY + case 0xA7C9 => 0xA7CA // LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY + case 0xA7D0 => 0xA7D1 // LATIN CAPITAL LETTER CLOSED INSULAR G + case 0xA7D6 => 0xA7D7 // LATIN CAPITAL LETTER MIDDLE SCOTS S + case 0xA7D8 => 0xA7D9 // LATIN CAPITAL LETTER SIGMOID S + case 0xA7F5 => 0xA7F6 // LATIN CAPITAL LETTER REVERSED HALF H + case 0xAB70 => 0x13A0 // CHEROKEE SMALL LETTER A + case 0xAB71 => 0x13A1 // CHEROKEE SMALL LETTER E + case 0xAB72 => 0x13A2 // CHEROKEE SMALL LETTER I + case 0xAB73 => 0x13A3 // CHEROKEE SMALL LETTER O + case 0xAB74 => 0x13A4 // CHEROKEE SMALL LETTER U + case 0xAB75 => 0x13A5 // CHEROKEE SMALL LETTER V + case 0xAB76 => 0x13A6 // CHEROKEE SMALL LETTER GA + case 0xAB77 => 0x13A7 // CHEROKEE SMALL LETTER KA + case 0xAB78 => 0x13A8 // CHEROKEE SMALL LETTER GE + case 0xAB79 => 0x13A9 // CHEROKEE SMALL LETTER GI + case 0xAB7A => 0x13AA // CHEROKEE SMALL LETTER GO + case 0xAB7B => 0x13AB // CHEROKEE SMALL LETTER GU + case 0xAB7C => 0x13AC // CHEROKEE SMALL LETTER GV + case 0xAB7D => 0x13AD // CHEROKEE SMALL LETTER HA + case 0xAB7E => 0x13AE // CHEROKEE SMALL LETTER HE + case 0xAB7F => 0x13AF // CHEROKEE SMALL LETTER HI + case 0xAB80 => 0x13B0 // CHEROKEE SMALL LETTER HO + case 0xAB81 => 0x13B1 // CHEROKEE SMALL LETTER HU + case 0xAB82 => 0x13B2 // CHEROKEE SMALL LETTER HV + case 0xAB83 => 0x13B3 // CHEROKEE SMALL LETTER LA + case 0xAB84 => 0x13B4 // CHEROKEE SMALL LETTER LE + case 0xAB85 => 0x13B5 // CHEROKEE SMALL LETTER LI + case 0xAB86 => 0x13B6 // CHEROKEE SMALL LETTER LO + case 0xAB87 => 0x13B7 // CHEROKEE SMALL LETTER LU + case 0xAB88 => 0x13B8 // CHEROKEE SMALL LETTER LV + case 0xAB89 => 0x13B9 // CHEROKEE SMALL LETTER MA + case 0xAB8A => 0x13BA // CHEROKEE SMALL LETTER ME + case 0xAB8B => 0x13BB // CHEROKEE SMALL LETTER MI + case 0xAB8C => 0x13BC // CHEROKEE SMALL LETTER MO + case 0xAB8D => 0x13BD // CHEROKEE SMALL LETTER MU + case 0xAB8E => 0x13BE // CHEROKEE SMALL LETTER NA + case 0xAB8F => 0x13BF // CHEROKEE SMALL LETTER HNA + case 0xAB90 => 0x13C0 // CHEROKEE SMALL LETTER NAH + case 0xAB91 => 0x13C1 // CHEROKEE SMALL LETTER NE + case 0xAB92 => 0x13C2 // CHEROKEE SMALL LETTER NI + case 0xAB93 => 0x13C3 // CHEROKEE SMALL LETTER NO + case 0xAB94 => 0x13C4 // CHEROKEE SMALL LETTER NU + case 0xAB95 => 0x13C5 // CHEROKEE SMALL LETTER NV + case 0xAB96 => 0x13C6 // CHEROKEE SMALL LETTER QUA + case 0xAB97 => 0x13C7 // CHEROKEE SMALL LETTER QUE + case 0xAB98 => 0x13C8 // CHEROKEE SMALL LETTER QUI + case 0xAB99 => 0x13C9 // CHEROKEE SMALL LETTER QUO + case 0xAB9A => 0x13CA // CHEROKEE SMALL LETTER QUU + case 0xAB9B => 0x13CB // CHEROKEE SMALL LETTER QUV + case 0xAB9C => 0x13CC // CHEROKEE SMALL LETTER SA + case 0xAB9D => 0x13CD // CHEROKEE SMALL LETTER S + case 0xAB9E => 0x13CE // CHEROKEE SMALL LETTER SE + case 0xAB9F => 0x13CF // CHEROKEE SMALL LETTER SI + case 0xABA0 => 0x13D0 // CHEROKEE SMALL LETTER SO + case 0xABA1 => 0x13D1 // CHEROKEE SMALL LETTER SU + case 0xABA2 => 0x13D2 // CHEROKEE SMALL LETTER SV + case 0xABA3 => 0x13D3 // CHEROKEE SMALL LETTER DA + case 0xABA4 => 0x13D4 // CHEROKEE SMALL LETTER TA + case 0xABA5 => 0x13D5 // CHEROKEE SMALL LETTER DE + case 0xABA6 => 0x13D6 // CHEROKEE SMALL LETTER TE + case 0xABA7 => 0x13D7 // CHEROKEE SMALL LETTER DI + case 0xABA8 => 0x13D8 // CHEROKEE SMALL LETTER TI + case 0xABA9 => 0x13D9 // CHEROKEE SMALL LETTER DO + case 0xABAA => 0x13DA // CHEROKEE SMALL LETTER DU + case 0xABAB => 0x13DB // CHEROKEE SMALL LETTER DV + case 0xABAC => 0x13DC // CHEROKEE SMALL LETTER DLA + case 0xABAD => 0x13DD // CHEROKEE SMALL LETTER TLA + case 0xABAE => 0x13DE // CHEROKEE SMALL LETTER TLE + case 0xABAF => 0x13DF // CHEROKEE SMALL LETTER TLI + case 0xABB0 => 0x13E0 // CHEROKEE SMALL LETTER TLO + case 0xABB1 => 0x13E1 // CHEROKEE SMALL LETTER TLU + case 0xABB2 => 0x13E2 // CHEROKEE SMALL LETTER TLV + case 0xABB3 => 0x13E3 // CHEROKEE SMALL LETTER TSA + case 0xABB4 => 0x13E4 // CHEROKEE SMALL LETTER TSE + case 0xABB5 => 0x13E5 // CHEROKEE SMALL LETTER TSI + case 0xABB6 => 0x13E6 // CHEROKEE SMALL LETTER TSO + case 0xABB7 => 0x13E7 // CHEROKEE SMALL LETTER TSU + case 0xABB8 => 0x13E8 // CHEROKEE SMALL LETTER TSV + case 0xABB9 => 0x13E9 // CHEROKEE SMALL LETTER WA + case 0xABBA => 0x13EA // CHEROKEE SMALL LETTER WE + case 0xABBB => 0x13EB // CHEROKEE SMALL LETTER WI + case 0xABBC => 0x13EC // CHEROKEE SMALL LETTER WO + case 0xABBD => 0x13ED // CHEROKEE SMALL LETTER WU + case 0xABBE => 0x13EE // CHEROKEE SMALL LETTER WV + case 0xABBF => 0x13EF // CHEROKEE SMALL LETTER YA + case 0xFF21 => 0xFF41 // FULLWIDTH LATIN CAPITAL LETTER A + case 0xFF22 => 0xFF42 // FULLWIDTH LATIN CAPITAL LETTER B + case 0xFF23 => 0xFF43 // FULLWIDTH LATIN CAPITAL LETTER C + case 0xFF24 => 0xFF44 // FULLWIDTH LATIN CAPITAL LETTER D + case 0xFF25 => 0xFF45 // FULLWIDTH LATIN CAPITAL LETTER E + case 0xFF26 => 0xFF46 // FULLWIDTH LATIN CAPITAL LETTER F + case 0xFF27 => 0xFF47 // FULLWIDTH LATIN CAPITAL LETTER G + case 0xFF28 => 0xFF48 // FULLWIDTH LATIN CAPITAL LETTER H + case 0xFF29 => 0xFF49 // FULLWIDTH LATIN CAPITAL LETTER I + case 0xFF2A => 0xFF4A // FULLWIDTH LATIN CAPITAL LETTER J + case 0xFF2B => 0xFF4B // FULLWIDTH LATIN CAPITAL LETTER K + case 0xFF2C => 0xFF4C // FULLWIDTH LATIN CAPITAL LETTER L + case 0xFF2D => 0xFF4D // FULLWIDTH LATIN CAPITAL LETTER M + case 0xFF2E => 0xFF4E // FULLWIDTH LATIN CAPITAL LETTER N + case 0xFF2F => 0xFF4F // FULLWIDTH LATIN CAPITAL LETTER O + case 0xFF30 => 0xFF50 // FULLWIDTH LATIN CAPITAL LETTER P + case 0xFF31 => 0xFF51 // FULLWIDTH LATIN CAPITAL LETTER Q + case 0xFF32 => 0xFF52 // FULLWIDTH LATIN CAPITAL LETTER R + case 0xFF33 => 0xFF53 // FULLWIDTH LATIN CAPITAL LETTER S + case 0xFF34 => 0xFF54 // FULLWIDTH LATIN CAPITAL LETTER T + case 0xFF35 => 0xFF55 // FULLWIDTH LATIN CAPITAL LETTER U + case 0xFF36 => 0xFF56 // FULLWIDTH LATIN CAPITAL LETTER V + case 0xFF37 => 0xFF57 // FULLWIDTH LATIN CAPITAL LETTER W + case 0xFF38 => 0xFF58 // FULLWIDTH LATIN CAPITAL LETTER X + case 0xFF39 => 0xFF59 // FULLWIDTH LATIN CAPITAL LETTER Y + case 0xFF3A => 0xFF5A // FULLWIDTH LATIN CAPITAL LETTER Z + case 0x10400 => 0x10428 // DESERET CAPITAL LETTER LONG I + case 0x10401 => 0x10429 // DESERET CAPITAL LETTER LONG E + case 0x10402 => 0x1042A // DESERET CAPITAL LETTER LONG A + case 0x10403 => 0x1042B // DESERET CAPITAL LETTER LONG AH + case 0x10404 => 0x1042C // DESERET CAPITAL LETTER LONG O + case 0x10405 => 0x1042D // DESERET CAPITAL LETTER LONG OO + case 0x10406 => 0x1042E // DESERET CAPITAL LETTER SHORT I + case 0x10407 => 0x1042F // DESERET CAPITAL LETTER SHORT E + case 0x10408 => 0x10430 // DESERET CAPITAL LETTER SHORT A + case 0x10409 => 0x10431 // DESERET CAPITAL LETTER SHORT AH + case 0x1040A => 0x10432 // DESERET CAPITAL LETTER SHORT O + case 0x1040B => 0x10433 // DESERET CAPITAL LETTER SHORT OO + case 0x1040C => 0x10434 // DESERET CAPITAL LETTER AY + case 0x1040D => 0x10435 // DESERET CAPITAL LETTER OW + case 0x1040E => 0x10436 // DESERET CAPITAL LETTER WU + case 0x1040F => 0x10437 // DESERET CAPITAL LETTER YEE + case 0x10410 => 0x10438 // DESERET CAPITAL LETTER H + case 0x10411 => 0x10439 // DESERET CAPITAL LETTER PEE + case 0x10412 => 0x1043A // DESERET CAPITAL LETTER BEE + case 0x10413 => 0x1043B // DESERET CAPITAL LETTER TEE + case 0x10414 => 0x1043C // DESERET CAPITAL LETTER DEE + case 0x10415 => 0x1043D // DESERET CAPITAL LETTER CHEE + case 0x10416 => 0x1043E // DESERET CAPITAL LETTER JEE + case 0x10417 => 0x1043F // DESERET CAPITAL LETTER KAY + case 0x10418 => 0x10440 // DESERET CAPITAL LETTER GAY + case 0x10419 => 0x10441 // DESERET CAPITAL LETTER EF + case 0x1041A => 0x10442 // DESERET CAPITAL LETTER VEE + case 0x1041B => 0x10443 // DESERET CAPITAL LETTER ETH + case 0x1041C => 0x10444 // DESERET CAPITAL LETTER THEE + case 0x1041D => 0x10445 // DESERET CAPITAL LETTER ES + case 0x1041E => 0x10446 // DESERET CAPITAL LETTER ZEE + case 0x1041F => 0x10447 // DESERET CAPITAL LETTER ESH + case 0x10420 => 0x10448 // DESERET CAPITAL LETTER ZHEE + case 0x10421 => 0x10449 // DESERET CAPITAL LETTER ER + case 0x10422 => 0x1044A // DESERET CAPITAL LETTER EL + case 0x10423 => 0x1044B // DESERET CAPITAL LETTER EM + case 0x10424 => 0x1044C // DESERET CAPITAL LETTER EN + case 0x10425 => 0x1044D // DESERET CAPITAL LETTER ENG + case 0x10426 => 0x1044E // DESERET CAPITAL LETTER OI + case 0x10427 => 0x1044F // DESERET CAPITAL LETTER EW + case 0x104B0 => 0x104D8 // OSAGE CAPITAL LETTER A + case 0x104B1 => 0x104D9 // OSAGE CAPITAL LETTER AI + case 0x104B2 => 0x104DA // OSAGE CAPITAL LETTER AIN + case 0x104B3 => 0x104DB // OSAGE CAPITAL LETTER AH + case 0x104B4 => 0x104DC // OSAGE CAPITAL LETTER BRA + case 0x104B5 => 0x104DD // OSAGE CAPITAL LETTER CHA + case 0x104B6 => 0x104DE // OSAGE CAPITAL LETTER EHCHA + case 0x104B7 => 0x104DF // OSAGE CAPITAL LETTER E + case 0x104B8 => 0x104E0 // OSAGE CAPITAL LETTER EIN + case 0x104B9 => 0x104E1 // OSAGE CAPITAL LETTER HA + case 0x104BA => 0x104E2 // OSAGE CAPITAL LETTER HYA + case 0x104BB => 0x104E3 // OSAGE CAPITAL LETTER I + case 0x104BC => 0x104E4 // OSAGE CAPITAL LETTER KA + case 0x104BD => 0x104E5 // OSAGE CAPITAL LETTER EHKA + case 0x104BE => 0x104E6 // OSAGE CAPITAL LETTER KYA + case 0x104BF => 0x104E7 // OSAGE CAPITAL LETTER LA + case 0x104C0 => 0x104E8 // OSAGE CAPITAL LETTER MA + case 0x104C1 => 0x104E9 // OSAGE CAPITAL LETTER NA + case 0x104C2 => 0x104EA // OSAGE CAPITAL LETTER O + case 0x104C3 => 0x104EB // OSAGE CAPITAL LETTER OIN + case 0x104C4 => 0x104EC // OSAGE CAPITAL LETTER PA + case 0x104C5 => 0x104ED // OSAGE CAPITAL LETTER EHPA + case 0x104C6 => 0x104EE // OSAGE CAPITAL LETTER SA + case 0x104C7 => 0x104EF // OSAGE CAPITAL LETTER SHA + case 0x104C8 => 0x104F0 // OSAGE CAPITAL LETTER TA + case 0x104C9 => 0x104F1 // OSAGE CAPITAL LETTER EHTA + case 0x104CA => 0x104F2 // OSAGE CAPITAL LETTER TSA + case 0x104CB => 0x104F3 // OSAGE CAPITAL LETTER EHTSA + case 0x104CC => 0x104F4 // OSAGE CAPITAL LETTER TSHA + case 0x104CD => 0x104F5 // OSAGE CAPITAL LETTER DHA + case 0x104CE => 0x104F6 // OSAGE CAPITAL LETTER U + case 0x104CF => 0x104F7 // OSAGE CAPITAL LETTER WA + case 0x104D0 => 0x104F8 // OSAGE CAPITAL LETTER KHA + case 0x104D1 => 0x104F9 // OSAGE CAPITAL LETTER GHA + case 0x104D2 => 0x104FA // OSAGE CAPITAL LETTER ZA + case 0x104D3 => 0x104FB // OSAGE CAPITAL LETTER ZHA + case 0x10570 => 0x10597 // VITHKUQI CAPITAL LETTER A + case 0x10571 => 0x10598 // VITHKUQI CAPITAL LETTER BBE + case 0x10572 => 0x10599 // VITHKUQI CAPITAL LETTER BE + case 0x10573 => 0x1059A // VITHKUQI CAPITAL LETTER CE + case 0x10574 => 0x1059B // VITHKUQI CAPITAL LETTER CHE + case 0x10575 => 0x1059C // VITHKUQI CAPITAL LETTER DE + case 0x10576 => 0x1059D // VITHKUQI CAPITAL LETTER DHE + case 0x10577 => 0x1059E // VITHKUQI CAPITAL LETTER EI + case 0x10578 => 0x1059F // VITHKUQI CAPITAL LETTER E + case 0x10579 => 0x105A0 // VITHKUQI CAPITAL LETTER FE + case 0x1057A => 0x105A1 // VITHKUQI CAPITAL LETTER GA + case 0x1057C => 0x105A3 // VITHKUQI CAPITAL LETTER HA + case 0x1057D => 0x105A4 // VITHKUQI CAPITAL LETTER HHA + case 0x1057E => 0x105A5 // VITHKUQI CAPITAL LETTER I + case 0x1057F => 0x105A6 // VITHKUQI CAPITAL LETTER IJE + case 0x10580 => 0x105A7 // VITHKUQI CAPITAL LETTER JE + case 0x10581 => 0x105A8 // VITHKUQI CAPITAL LETTER KA + case 0x10582 => 0x105A9 // VITHKUQI CAPITAL LETTER LA + case 0x10583 => 0x105AA // VITHKUQI CAPITAL LETTER LLA + case 0x10584 => 0x105AB // VITHKUQI CAPITAL LETTER ME + case 0x10585 => 0x105AC // VITHKUQI CAPITAL LETTER NE + case 0x10586 => 0x105AD // VITHKUQI CAPITAL LETTER NJE + case 0x10587 => 0x105AE // VITHKUQI CAPITAL LETTER O + case 0x10588 => 0x105AF // VITHKUQI CAPITAL LETTER PE + case 0x10589 => 0x105B0 // VITHKUQI CAPITAL LETTER QA + case 0x1058A => 0x105B1 // VITHKUQI CAPITAL LETTER RE + case 0x1058C => 0x105B3 // VITHKUQI CAPITAL LETTER SE + case 0x1058D => 0x105B4 // VITHKUQI CAPITAL LETTER SHE + case 0x1058E => 0x105B5 // VITHKUQI CAPITAL LETTER TE + case 0x1058F => 0x105B6 // VITHKUQI CAPITAL LETTER THE + case 0x10590 => 0x105B7 // VITHKUQI CAPITAL LETTER U + case 0x10591 => 0x105B8 // VITHKUQI CAPITAL LETTER VE + case 0x10592 => 0x105B9 // VITHKUQI CAPITAL LETTER XE + case 0x10594 => 0x105BB // VITHKUQI CAPITAL LETTER Y + case 0x10595 => 0x105BC // VITHKUQI CAPITAL LETTER ZE + case 0x10C80 => 0x10CC0 // OLD HUNGARIAN CAPITAL LETTER A + case 0x10C81 => 0x10CC1 // OLD HUNGARIAN CAPITAL LETTER AA + case 0x10C82 => 0x10CC2 // OLD HUNGARIAN CAPITAL LETTER EB + case 0x10C83 => 0x10CC3 // OLD HUNGARIAN CAPITAL LETTER AMB + case 0x10C84 => 0x10CC4 // OLD HUNGARIAN CAPITAL LETTER EC + case 0x10C85 => 0x10CC5 // OLD HUNGARIAN CAPITAL LETTER ENC + case 0x10C86 => 0x10CC6 // OLD HUNGARIAN CAPITAL LETTER ECS + case 0x10C87 => 0x10CC7 // OLD HUNGARIAN CAPITAL LETTER ED + case 0x10C88 => 0x10CC8 // OLD HUNGARIAN CAPITAL LETTER AND + case 0x10C89 => 0x10CC9 // OLD HUNGARIAN CAPITAL LETTER E + case 0x10C8A => 0x10CCA // OLD HUNGARIAN CAPITAL LETTER CLOSE E + case 0x10C8B => 0x10CCB // OLD HUNGARIAN CAPITAL LETTER EE + case 0x10C8C => 0x10CCC // OLD HUNGARIAN CAPITAL LETTER EF + case 0x10C8D => 0x10CCD // OLD HUNGARIAN CAPITAL LETTER EG + case 0x10C8E => 0x10CCE // OLD HUNGARIAN CAPITAL LETTER EGY + case 0x10C8F => 0x10CCF // OLD HUNGARIAN CAPITAL LETTER EH + case 0x10C90 => 0x10CD0 // OLD HUNGARIAN CAPITAL LETTER I + case 0x10C91 => 0x10CD1 // OLD HUNGARIAN CAPITAL LETTER II + case 0x10C92 => 0x10CD2 // OLD HUNGARIAN CAPITAL LETTER EJ + case 0x10C93 => 0x10CD3 // OLD HUNGARIAN CAPITAL LETTER EK + case 0x10C94 => 0x10CD4 // OLD HUNGARIAN CAPITAL LETTER AK + case 0x10C95 => 0x10CD5 // OLD HUNGARIAN CAPITAL LETTER UNK + case 0x10C96 => 0x10CD6 // OLD HUNGARIAN CAPITAL LETTER EL + case 0x10C97 => 0x10CD7 // OLD HUNGARIAN CAPITAL LETTER ELY + case 0x10C98 => 0x10CD8 // OLD HUNGARIAN CAPITAL LETTER EM + case 0x10C99 => 0x10CD9 // OLD HUNGARIAN CAPITAL LETTER EN + case 0x10C9A => 0x10CDA // OLD HUNGARIAN CAPITAL LETTER ENY + case 0x10C9B => 0x10CDB // OLD HUNGARIAN CAPITAL LETTER O + case 0x10C9C => 0x10CDC // OLD HUNGARIAN CAPITAL LETTER OO + case 0x10C9D => 0x10CDD // OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE + case 0x10C9E => 0x10CDE // OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE + case 0x10C9F => 0x10CDF // OLD HUNGARIAN CAPITAL LETTER OEE + case 0x10CA0 => 0x10CE0 // OLD HUNGARIAN CAPITAL LETTER EP + case 0x10CA1 => 0x10CE1 // OLD HUNGARIAN CAPITAL LETTER EMP + case 0x10CA2 => 0x10CE2 // OLD HUNGARIAN CAPITAL LETTER ER + case 0x10CA3 => 0x10CE3 // OLD HUNGARIAN CAPITAL LETTER SHORT ER + case 0x10CA4 => 0x10CE4 // OLD HUNGARIAN CAPITAL LETTER ES + case 0x10CA5 => 0x10CE5 // OLD HUNGARIAN CAPITAL LETTER ESZ + case 0x10CA6 => 0x10CE6 // OLD HUNGARIAN CAPITAL LETTER ET + case 0x10CA7 => 0x10CE7 // OLD HUNGARIAN CAPITAL LETTER ENT + case 0x10CA8 => 0x10CE8 // OLD HUNGARIAN CAPITAL LETTER ETY + case 0x10CA9 => 0x10CE9 // OLD HUNGARIAN CAPITAL LETTER ECH + case 0x10CAA => 0x10CEA // OLD HUNGARIAN CAPITAL LETTER U + case 0x10CAB => 0x10CEB // OLD HUNGARIAN CAPITAL LETTER UU + case 0x10CAC => 0x10CEC // OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE + case 0x10CAD => 0x10CED // OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE + case 0x10CAE => 0x10CEE // OLD HUNGARIAN CAPITAL LETTER EV + case 0x10CAF => 0x10CEF // OLD HUNGARIAN CAPITAL LETTER EZ + case 0x10CB0 => 0x10CF0 // OLD HUNGARIAN CAPITAL LETTER EZS + case 0x10CB1 => 0x10CF1 // OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN + case 0x10CB2 => 0x10CF2 // OLD HUNGARIAN CAPITAL LETTER US + case 0x118A0 => 0x118C0 // WARANG CITI CAPITAL LETTER NGAA + case 0x118A1 => 0x118C1 // WARANG CITI CAPITAL LETTER A + case 0x118A2 => 0x118C2 // WARANG CITI CAPITAL LETTER WI + case 0x118A3 => 0x118C3 // WARANG CITI CAPITAL LETTER YU + case 0x118A4 => 0x118C4 // WARANG CITI CAPITAL LETTER YA + case 0x118A5 => 0x118C5 // WARANG CITI CAPITAL LETTER YO + case 0x118A6 => 0x118C6 // WARANG CITI CAPITAL LETTER II + case 0x118A7 => 0x118C7 // WARANG CITI CAPITAL LETTER UU + case 0x118A8 => 0x118C8 // WARANG CITI CAPITAL LETTER E + case 0x118A9 => 0x118C9 // WARANG CITI CAPITAL LETTER O + case 0x118AA => 0x118CA // WARANG CITI CAPITAL LETTER ANG + case 0x118AB => 0x118CB // WARANG CITI CAPITAL LETTER GA + case 0x118AC => 0x118CC // WARANG CITI CAPITAL LETTER KO + case 0x118AD => 0x118CD // WARANG CITI CAPITAL LETTER ENY + case 0x118AE => 0x118CE // WARANG CITI CAPITAL LETTER YUJ + case 0x118AF => 0x118CF // WARANG CITI CAPITAL LETTER UC + case 0x118B0 => 0x118D0 // WARANG CITI CAPITAL LETTER ENN + case 0x118B1 => 0x118D1 // WARANG CITI CAPITAL LETTER ODD + case 0x118B2 => 0x118D2 // WARANG CITI CAPITAL LETTER TTE + case 0x118B3 => 0x118D3 // WARANG CITI CAPITAL LETTER NUNG + case 0x118B4 => 0x118D4 // WARANG CITI CAPITAL LETTER DA + case 0x118B5 => 0x118D5 // WARANG CITI CAPITAL LETTER AT + case 0x118B6 => 0x118D6 // WARANG CITI CAPITAL LETTER AM + case 0x118B7 => 0x118D7 // WARANG CITI CAPITAL LETTER BU + case 0x118B8 => 0x118D8 // WARANG CITI CAPITAL LETTER PU + case 0x118B9 => 0x118D9 // WARANG CITI CAPITAL LETTER HIYO + case 0x118BA => 0x118DA // WARANG CITI CAPITAL LETTER HOLO + case 0x118BB => 0x118DB // WARANG CITI CAPITAL LETTER HORR + case 0x118BC => 0x118DC // WARANG CITI CAPITAL LETTER HAR + case 0x118BD => 0x118DD // WARANG CITI CAPITAL LETTER SSUU + case 0x118BE => 0x118DE // WARANG CITI CAPITAL LETTER SII + case 0x118BF => 0x118DF // WARANG CITI CAPITAL LETTER VIYO + case 0x16E40 => 0x16E60 // MEDEFAIDRIN CAPITAL LETTER M + case 0x16E41 => 0x16E61 // MEDEFAIDRIN CAPITAL LETTER S + case 0x16E42 => 0x16E62 // MEDEFAIDRIN CAPITAL LETTER V + case 0x16E43 => 0x16E63 // MEDEFAIDRIN CAPITAL LETTER W + case 0x16E44 => 0x16E64 // MEDEFAIDRIN CAPITAL LETTER ATIU + case 0x16E45 => 0x16E65 // MEDEFAIDRIN CAPITAL LETTER Z + case 0x16E46 => 0x16E66 // MEDEFAIDRIN CAPITAL LETTER KP + case 0x16E47 => 0x16E67 // MEDEFAIDRIN CAPITAL LETTER P + case 0x16E48 => 0x16E68 // MEDEFAIDRIN CAPITAL LETTER T + case 0x16E49 => 0x16E69 // MEDEFAIDRIN CAPITAL LETTER G + case 0x16E4A => 0x16E6A // MEDEFAIDRIN CAPITAL LETTER F + case 0x16E4B => 0x16E6B // MEDEFAIDRIN CAPITAL LETTER I + case 0x16E4C => 0x16E6C // MEDEFAIDRIN CAPITAL LETTER K + case 0x16E4D => 0x16E6D // MEDEFAIDRIN CAPITAL LETTER A + case 0x16E4E => 0x16E6E // MEDEFAIDRIN CAPITAL LETTER J + case 0x16E4F => 0x16E6F // MEDEFAIDRIN CAPITAL LETTER E + case 0x16E50 => 0x16E70 // MEDEFAIDRIN CAPITAL LETTER B + case 0x16E51 => 0x16E71 // MEDEFAIDRIN CAPITAL LETTER C + case 0x16E52 => 0x16E72 // MEDEFAIDRIN CAPITAL LETTER U + case 0x16E53 => 0x16E73 // MEDEFAIDRIN CAPITAL LETTER YU + case 0x16E54 => 0x16E74 // MEDEFAIDRIN CAPITAL LETTER L + case 0x16E55 => 0x16E75 // MEDEFAIDRIN CAPITAL LETTER Q + case 0x16E56 => 0x16E76 // MEDEFAIDRIN CAPITAL LETTER HP + case 0x16E57 => 0x16E77 // MEDEFAIDRIN CAPITAL LETTER NY + case 0x16E58 => 0x16E78 // MEDEFAIDRIN CAPITAL LETTER X + case 0x16E59 => 0x16E79 // MEDEFAIDRIN CAPITAL LETTER D + case 0x16E5A => 0x16E7A // MEDEFAIDRIN CAPITAL LETTER OE + case 0x16E5B => 0x16E7B // MEDEFAIDRIN CAPITAL LETTER N + case 0x16E5C => 0x16E7C // MEDEFAIDRIN CAPITAL LETTER R + case 0x16E5D => 0x16E7D // MEDEFAIDRIN CAPITAL LETTER O + case 0x16E5E => 0x16E7E // MEDEFAIDRIN CAPITAL LETTER AI + case 0x16E5F => 0x16E7F // MEDEFAIDRIN CAPITAL LETTER Y + case 0x1E900 => 0x1E922 // ADLAM CAPITAL LETTER ALIF + case 0x1E901 => 0x1E923 // ADLAM CAPITAL LETTER DAALI + case 0x1E902 => 0x1E924 // ADLAM CAPITAL LETTER LAAM + case 0x1E903 => 0x1E925 // ADLAM CAPITAL LETTER MIIM + case 0x1E904 => 0x1E926 // ADLAM CAPITAL LETTER BA + case 0x1E905 => 0x1E927 // ADLAM CAPITAL LETTER SINNYIIYHE + case 0x1E906 => 0x1E928 // ADLAM CAPITAL LETTER PE + case 0x1E907 => 0x1E929 // ADLAM CAPITAL LETTER BHE + case 0x1E908 => 0x1E92A // ADLAM CAPITAL LETTER RA + case 0x1E909 => 0x1E92B // ADLAM CAPITAL LETTER E + case 0x1E90A => 0x1E92C // ADLAM CAPITAL LETTER FA + case 0x1E90B => 0x1E92D // ADLAM CAPITAL LETTER I + case 0x1E90C => 0x1E92E // ADLAM CAPITAL LETTER O + case 0x1E90D => 0x1E92F // ADLAM CAPITAL LETTER DHA + case 0x1E90E => 0x1E930 // ADLAM CAPITAL LETTER YHE + case 0x1E90F => 0x1E931 // ADLAM CAPITAL LETTER WAW + case 0x1E910 => 0x1E932 // ADLAM CAPITAL LETTER NUN + case 0x1E911 => 0x1E933 // ADLAM CAPITAL LETTER KAF + case 0x1E912 => 0x1E934 // ADLAM CAPITAL LETTER YA + case 0x1E913 => 0x1E935 // ADLAM CAPITAL LETTER U + case 0x1E914 => 0x1E936 // ADLAM CAPITAL LETTER JIIM + case 0x1E915 => 0x1E937 // ADLAM CAPITAL LETTER CHI + case 0x1E916 => 0x1E938 // ADLAM CAPITAL LETTER HA + case 0x1E917 => 0x1E939 // ADLAM CAPITAL LETTER QAAF + case 0x1E918 => 0x1E93A // ADLAM CAPITAL LETTER GA + case 0x1E919 => 0x1E93B // ADLAM CAPITAL LETTER NYA + case 0x1E91A => 0x1E93C // ADLAM CAPITAL LETTER TU + case 0x1E91B => 0x1E93D // ADLAM CAPITAL LETTER NHA + case 0x1E91C => 0x1E93E // ADLAM CAPITAL LETTER VA + case 0x1E91D => 0x1E93F // ADLAM CAPITAL LETTER KHA + case 0x1E91E => 0x1E940 // ADLAM CAPITAL LETTER GBE + case 0x1E91F => 0x1E941 // ADLAM CAPITAL LETTER ZAL + case 0x1E920 => 0x1E942 // ADLAM CAPITAL LETTER KPO + case 0x1E921 => 0x1E943 // ADLAM CAPITAL LETTER SHA + case _ => codePoint // All others map to themselves } } From dadac58eeccf8d4a46b08f67bc818ff2db27cfff Mon Sep 17 00:00:00 2001 From: David Strawn Date: Sun, 6 Feb 2022 09:13:35 -0700 Subject: [PATCH 07/10] Implement (Almost) All The Unicode Caseless Matching Systems The exception is identifier caseless matching because the normalization operations are not natively supported by `java.text.Normalizer` and would require even more code. This change introduces a staggering _12_ types of case folded strings. These break down to simple/full case folded strings, each with a default and Turkic variant, and then for each of those we have default, canonical, and compatibility normalization/folding. CIString is now deprecated and defers to CIStringS, which is an input remembering caseless string using default Unicode caseless matching on simple case folded strings. This is the most similar form to what CIString was doing before, though I think most users will actually want to use CIStringCF which is canonical Unicode caseless matching on full case folded strings. --- .../ci/bench/CaseFoldedStringBench.scala | 44 - .../scala/org/typelevel/ci/CIString.scala | 54 +- .../scala/org/typelevel/ci/CIStringCF.scala | 41 + .../scala/org/typelevel/ci/CIStringCS.scala | 87 + .../scala/org/typelevel/ci/CIStringS.scala | 87 + .../ci/CanonicalFullCaseFoldedString.scala | 40 + .../ci/CanonicalSimpleCaseFoldedString.scala | 90 + .../CanonicalTurkicFullCaseFoldedString.scala | 40 + ...anonicalTurkicSimpleCaseFoldedString.scala | 40 + .../org/typelevel/ci/CaseFoldedString.scala | 162 - .../scala/org/typelevel/ci/CaseFolding.scala | 2926 +++++++++-------- .../CompatibilityFullCaseFoldedString.scala | 70 + .../CompatibilitySimpleCaseFoldedString.scala | 51 + ...patibilityTurkicFullCaseFoldedString.scala | 52 + ...tibilityTurkicSimpleCaseFoldedString.scala | 52 + .../typelevel/ci/FullCaseFoldedString.scala | 34 + .../typelevel/ci/SimpleCaseFoldedString.scala | 73 + .../ci/TurkicFullCaseFoldedString.scala | 29 + .../ci/TurkicSimpleCaseFoldedString.scala | 29 + .../typelevel/ci/testing/arbitraries.scala | 33 +- .../ci/CaseFoldedStringJVMSuite.scala | 25 - .../org/typelevel/ci/CIStringSuite.scala | 3 +- .../typelevel/ci/CaseFoldedStringSuite.scala | 190 -- 23 files changed, 2388 insertions(+), 1864 deletions(-) delete mode 100644 bench/src/main/scala/com/rossabaker/ci/bench/CaseFoldedStringBench.scala create mode 100644 core/src/main/scala/org/typelevel/ci/CIStringCF.scala create mode 100644 core/src/main/scala/org/typelevel/ci/CIStringCS.scala create mode 100644 core/src/main/scala/org/typelevel/ci/CIStringS.scala create mode 100644 core/src/main/scala/org/typelevel/ci/CanonicalFullCaseFoldedString.scala create mode 100644 core/src/main/scala/org/typelevel/ci/CanonicalSimpleCaseFoldedString.scala create mode 100644 core/src/main/scala/org/typelevel/ci/CanonicalTurkicFullCaseFoldedString.scala create mode 100644 core/src/main/scala/org/typelevel/ci/CanonicalTurkicSimpleCaseFoldedString.scala delete mode 100644 core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala create mode 100644 core/src/main/scala/org/typelevel/ci/CompatibilityFullCaseFoldedString.scala create mode 100644 core/src/main/scala/org/typelevel/ci/CompatibilitySimpleCaseFoldedString.scala create mode 100644 core/src/main/scala/org/typelevel/ci/CompatibilityTurkicFullCaseFoldedString.scala create mode 100644 core/src/main/scala/org/typelevel/ci/CompatibilityTurkicSimpleCaseFoldedString.scala create mode 100644 core/src/main/scala/org/typelevel/ci/FullCaseFoldedString.scala create mode 100644 core/src/main/scala/org/typelevel/ci/SimpleCaseFoldedString.scala create mode 100644 core/src/main/scala/org/typelevel/ci/TurkicFullCaseFoldedString.scala create mode 100644 core/src/main/scala/org/typelevel/ci/TurkicSimpleCaseFoldedString.scala delete mode 100644 tests/jvm/src/test/scala/org/typelevel/ci/CaseFoldedStringJVMSuite.scala delete mode 100644 tests/shared/src/test/scala/org/typelevel/ci/CaseFoldedStringSuite.scala diff --git a/bench/src/main/scala/com/rossabaker/ci/bench/CaseFoldedStringBench.scala b/bench/src/main/scala/com/rossabaker/ci/bench/CaseFoldedStringBench.scala deleted file mode 100644 index 082a1ac..0000000 --- a/bench/src/main/scala/com/rossabaker/ci/bench/CaseFoldedStringBench.scala +++ /dev/null @@ -1,44 +0,0 @@ -package org.typelevel.ci -package bench - -import org.scalacheck._ -import org.typelevel.ci.testing.arbitraries._ -import cats._ -import org.openjdk.jmh.annotations._ -import java.util.concurrent.TimeUnit - -@State(Scope.Thread) -@BenchmarkMode(Array(Mode.Throughput, Mode.AverageTime)) -@OutputTimeUnit(TimeUnit.MILLISECONDS) -class CaseFoldedStringBench { - - var currentSeed: Long = Long.MinValue - - def nextSeed: Long = { - val seed = currentSeed - currentSeed += 1L - seed - } - - def nextString: String = - Arbitrary.arbitrary[String].apply(Gen.Parameters.default, rng.Seed(nextSeed)).getOrElse(throw new AssertionError("Failed to generate String.")) - - def nextListOfString: List[String] = - Gen.listOf(Arbitrary.arbitrary[String])(Gen.Parameters.default, rng.Seed(nextSeed)).getOrElse(throw new AssertionError("Failed to generate String.")) - - @Benchmark - def caseFoldedStringHash: Int = - CaseFoldedString(nextString).hashCode - - @Benchmark - def caseFoldedStringFoldMap: CaseFoldedString = - Foldable[List].foldMap(nextListOfString)(CaseFoldedString.apply) - - @Benchmark - def stringHash: Int = - nextString.hashCode - - @Benchmark - def stringFoldMap: String = - Foldable[List].foldMap(nextListOfString)(identity) -} diff --git a/core/src/main/scala/org/typelevel/ci/CIString.scala b/core/src/main/scala/org/typelevel/ci/CIString.scala index af4ff7a..f930b25 100644 --- a/core/src/main/scala/org/typelevel/ci/CIString.scala +++ b/core/src/main/scala/org/typelevel/ci/CIString.scala @@ -22,48 +22,30 @@ import java.io.Serializable import org.typelevel.ci.compat._ import scala.math.Ordered -/** A case-insensitive String. - * - * Comparisions are based on the case folded representation of the `String` - * as defined by the Unicode standard. See [[CaseFoldedString]] for a full - * discussion on those rules. - * - * @note This class differs from [[CaseFoldedString]] in that it keeps a - * reference to original input `String` in whatever form it was - * given. This makes [[CIString]] useful if you which to perform case - * insensitive operations on a `String`, but then recover the original, - * unaltered form. If you do not care about the original input form, - * and just want a single case insensitive `String` value, then - * [[CaseFoldedString]] is more efficient and you should consider using - * that directly. - * - * @param toString - * The original value the CI String was constructed with. - */ -final class CIString private (override val toString: String, val asCaseFoldedString: CaseFoldedString) +@deprecated( + message = + "Please use either CIStringCF, CIStringCS, or CIStringS instead. CIString/CIStringS implement Unicode default caseless matching on simple case folded strings. For most applications you probably want to use CIStringCF which implements Unicode canonical caseless matching on full case folded strings.", + since = "1.3.0") +final class CIString private (override val toString: String, val asCIStringS: CIStringS) extends Ordered[CIString] with Serializable { @deprecated(message = "Please provide a CaseFoldedString directly.", since = "1.3.0") - private def this(toString: String) = { - this(toString, CaseFoldedString(toString)) - } + private def this(toString: String) = + this(toString, CIStringS(toString)) override def equals(that: Any): Boolean = that match { case that: CIString => - // Note java.lang.String.equalsIgnoreCase _does not_ handle all title - // case unicode characters, so we can't use it here. See the tests for - // an example. - this.asCaseFoldedString == that.asCaseFoldedString + this.asCIStringS == that.asCIStringS case _ => false } override def hashCode(): Int = - asCaseFoldedString.hashCode + this.asCIStringS.hashCode override def compare(that: CIString): Int = - Ordering[CaseFoldedString].compare(asCaseFoldedString, that.asCaseFoldedString) + Order[CIStringS].compare(asCIStringS, that.asCIStringS) def transform(f: String => String): CIString = CIString(f(toString)) @@ -82,15 +64,17 @@ final class CIString private (override val toString: String, val asCaseFoldedStr @suppressUnusedImportWarningForCompat object CIString { - def apply(value: String, useTurkicFolding: Boolean): CIString = - new CIString(value, CaseFoldedString(value, useTurkicFolding)) - + @deprecated( + message = + "Please use either CIStringCF, CIStringCS, or CIStringS instead. CIString/CIStringS implement Unicode default caseless matching on simple case folded strings. For most applications you probably want to use CIStringCF which implements Unicode canonical caseless matching on full case folded strings.", + since = "1.3.0") def apply(value: String): CIString = - apply(value, false) - - def fromCaseFoldedString(value: CaseFoldedString): CIString = - new CIString(value.toString, value) + new CIString(value, CIStringS(value)) + @deprecated( + message = + "Please use either CIStringCF, CIStringCS, or CIStringS instead. CIString/CIStringS implement Unicode default caseless matching on simple case folded strings. For most applications you probably want to use CIStringCF which implements Unicode canonical caseless matching on full case folded strings.", + since = "1.3.0") val empty = CIString("") implicit val catsInstancesForOrgTypelevelCIString: Order[CIString] diff --git a/core/src/main/scala/org/typelevel/ci/CIStringCF.scala b/core/src/main/scala/org/typelevel/ci/CIStringCF.scala new file mode 100644 index 0000000..957013c --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CIStringCF.scala @@ -0,0 +1,41 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +final class CIStringCF private ( + override val toString: String, + val asCanonicalFullCaseFoldedString: CanonicalFullCaseFoldedString) + extends Serializable { + override def equals(that: Any): Boolean = + that match { + case that: CIStringCF => + asCanonicalFullCaseFoldedString == that.asCanonicalFullCaseFoldedString + case _ => + false + } + + override def hashCode(): Int = + asCanonicalFullCaseFoldedString.hashCode +} + +object CIStringCF { + def apply(value: String): CIStringCF = + new CIStringCF( + value, + CanonicalFullCaseFoldedString(value) + ) +} diff --git a/core/src/main/scala/org/typelevel/ci/CIStringCS.scala b/core/src/main/scala/org/typelevel/ci/CIStringCS.scala new file mode 100644 index 0000000..96ae00d --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CIStringCS.scala @@ -0,0 +1,87 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import cats._ +import cats.kernel._ +import cats.syntax.all._ + +final class CIStringCS private ( + override val toString: String, + val asCanonicalSimpleCaseFoldedString: CanonicalSimpleCaseFoldedString) + extends Serializable { + + override def equals(that: Any): Boolean = + that match { + case that: CIStringCS => + asCanonicalSimpleCaseFoldedString == that.asCanonicalSimpleCaseFoldedString + case _ => + false + } + + override def hashCode(): Int = + asCanonicalSimpleCaseFoldedString.hashCode +} + +object CIStringCS { + + def apply(value: String): CIStringCS = + new CIStringCS( + value, + CanonicalSimpleCaseFoldedString(value) + ) + + val empty: CIStringCS = apply("") + + implicit val hashAndOrderForCIStringCS: Hash[CIStringCS] with Order[CIStringCS] = + new Hash[CIStringCS] with Order[CIStringCS] { + override def hash(x: CIStringCS): Int = + x.hashCode + + override def compare(x: CIStringCS, y: CIStringCS): Int = + x.asCanonicalSimpleCaseFoldedString.compare(y.asCanonicalSimpleCaseFoldedString) + } + + implicit val orderingForCIStringCS: Ordering[CIStringCS] = + hashAndOrderForCIStringCS.toOrdering + + implicit val showForCIStringCS: Show[CIStringCS] = + Show.fromToString + + implicit val lowerBoundForCIStringCS: LowerBounded[CIStringCS] = + new LowerBounded[CIStringCS] { + override val partialOrder: PartialOrder[CIStringCS] = + hashAndOrderForCIStringCS + + override val minBound: CIStringCS = + empty + } + + implicit val monoidForCIStringCS: Monoid[CIStringCS] = + new Monoid[CIStringCS] { + override val empty: CIStringCS = CIStringCS.empty + + override def combine(x: CIStringCS, y: CIStringCS): CIStringCS = + CIStringCS(x.toString + y.toString) + + override def combineAll(xs: IterableOnce[CIStringCS]): CIStringCS = { + val sb: StringBuilder = new StringBuilder + xs.iterator.foreach(cfs => sb.append(cfs.toString)) + CIStringCS(sb.toString) + } + } +} diff --git a/core/src/main/scala/org/typelevel/ci/CIStringS.scala b/core/src/main/scala/org/typelevel/ci/CIStringS.scala new file mode 100644 index 0000000..e92eb44 --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CIStringS.scala @@ -0,0 +1,87 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import cats._ +import cats.kernel._ +import cats.syntax.all._ + +final class CIStringS private ( + override val toString: String, + val asSimpleCaseFoldedString: SimpleCaseFoldedString) + extends Serializable { + + override def equals(that: Any): Boolean = + that match { + case that: CIStringS => + asSimpleCaseFoldedString == that.asSimpleCaseFoldedString + case _ => + false + } + + override def hashCode(): Int = + asSimpleCaseFoldedString.hashCode +} + +object CIStringS { + + def apply(value: String): CIStringS = + new CIStringS( + value, + SimpleCaseFoldedString(value) + ) + + val empty: CIStringS = apply("") + + implicit val hashAndOrderForCIStringS: Hash[CIStringS] with Order[CIStringS] = + new Hash[CIStringS] with Order[CIStringS] { + override def hash(x: CIStringS): Int = + x.hashCode + + override def compare(x: CIStringS, y: CIStringS): Int = + x.asSimpleCaseFoldedString.compare(y.asSimpleCaseFoldedString) + } + + implicit val orderingForCIStringS: Ordering[CIStringS] = + hashAndOrderForCIStringS.toOrdering + + implicit val showForCIStringS: Show[CIStringS] = + Show.fromToString + + implicit val lowerBoundForCIStringS: LowerBounded[CIStringS] = + new LowerBounded[CIStringS] { + override val partialOrder: PartialOrder[CIStringS] = + hashAndOrderForCIStringS + + override val minBound: CIStringS = + empty + } + + implicit val monoidForCIStringS: Monoid[CIStringS] = + new Monoid[CIStringS] { + override val empty: CIStringS = CIStringS.empty + + override def combine(x: CIStringS, y: CIStringS): CIStringS = + CIStringS(x.toString + y.toString) + + override def combineAll(xs: IterableOnce[CIStringS]): CIStringS = { + val sb: StringBuilder = new StringBuilder + xs.iterator.foreach(cfs => sb.append(cfs.toString)) + CIStringS(sb.toString) + } + } +} diff --git a/core/src/main/scala/org/typelevel/ci/CanonicalFullCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CanonicalFullCaseFoldedString.scala new file mode 100644 index 0000000..218365a --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CanonicalFullCaseFoldedString.scala @@ -0,0 +1,40 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import java.text.Normalizer +import scala.annotation.tailrec + +final case class CanonicalFullCaseFoldedString private (override val toString: String) + extends AnyVal + +object CanonicalFullCaseFoldedString { + def apply(value: String): CanonicalFullCaseFoldedString = + new CanonicalFullCaseFoldedString( + Normalizer.normalize( + if (Normalizer.isNormalized(value, Normalizer.Form.NFD)) { + CaseFolding.fullCaseFoldString(value) + } else { + CaseFolding.fullCaseFoldString(Normalizer.normalize(value, Normalizer.Form.NFD)) + }, + Normalizer.Form.NFD + ) + ) + + val empty: CanonicalFullCaseFoldedString = + apply("") +} diff --git a/core/src/main/scala/org/typelevel/ci/CanonicalSimpleCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CanonicalSimpleCaseFoldedString.scala new file mode 100644 index 0000000..b048100 --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CanonicalSimpleCaseFoldedString.scala @@ -0,0 +1,90 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import cats._ +import cats.kernel._ +import cats.syntax.all._ +import java.text.Normalizer +import scala.annotation.tailrec + +final case class CanonicalSimpleCaseFoldedString private (override val toString: String) + extends AnyVal + +object CanonicalSimpleCaseFoldedString { + + def apply(value: String): CanonicalSimpleCaseFoldedString = + new CanonicalSimpleCaseFoldedString( + Normalizer.normalize( + if (Normalizer.isNormalized(value, Normalizer.Form.NFD)) { + CaseFolding.simpleCaseFoldString(value) + } else { + CaseFolding.simpleCaseFoldString(Normalizer.normalize(value, Normalizer.Form.NFD)) + }, + Normalizer.Form.NFD + ) + ) + + val empty: CanonicalSimpleCaseFoldedString = + apply("") + + implicit val hashAndOrderForCanonicalSimpleCaseFoldedString + : Hash[CanonicalSimpleCaseFoldedString] with Order[CanonicalSimpleCaseFoldedString] = + new Hash[CanonicalSimpleCaseFoldedString] with Order[CanonicalSimpleCaseFoldedString] { + override def hash(x: CanonicalSimpleCaseFoldedString): Int = + x.hashCode + + override def compare( + x: CanonicalSimpleCaseFoldedString, + y: CanonicalSimpleCaseFoldedString): Int = + x.toString.compare(y.toString) + } + + implicit val orderingForCanonicalSimpleCaseFoldedString + : Ordering[CanonicalSimpleCaseFoldedString] = + hashAndOrderForCanonicalSimpleCaseFoldedString.toOrdering + + implicit val showForCanonicalSimpleCaseFoldedString: Show[CanonicalSimpleCaseFoldedString] = + Show.fromToString + + implicit val lowerBoundForCanonicalSimpleCaseFoldedString + : LowerBounded[CanonicalSimpleCaseFoldedString] = + new LowerBounded[CanonicalSimpleCaseFoldedString] { + override val partialOrder: PartialOrder[CanonicalSimpleCaseFoldedString] = + hashAndOrderForCanonicalSimpleCaseFoldedString + + override val minBound: CanonicalSimpleCaseFoldedString = + empty + } + + implicit val monoidForCanonicalSimpleCaseFoldedString: Monoid[CanonicalSimpleCaseFoldedString] = + new Monoid[CanonicalSimpleCaseFoldedString] { + override val empty: CanonicalSimpleCaseFoldedString = CanonicalSimpleCaseFoldedString.empty + + override def combine( + x: CanonicalSimpleCaseFoldedString, + y: CanonicalSimpleCaseFoldedString): CanonicalSimpleCaseFoldedString = + CanonicalSimpleCaseFoldedString(x.toString + y.toString) + + override def combineAll( + xs: IterableOnce[CanonicalSimpleCaseFoldedString]): CanonicalSimpleCaseFoldedString = { + val sb: StringBuilder = new StringBuilder + xs.iterator.foreach(cfs => sb.append(cfs.toString)) + CanonicalSimpleCaseFoldedString(sb.toString) + } + } +} diff --git a/core/src/main/scala/org/typelevel/ci/CanonicalTurkicFullCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CanonicalTurkicFullCaseFoldedString.scala new file mode 100644 index 0000000..8b6b9d3 --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CanonicalTurkicFullCaseFoldedString.scala @@ -0,0 +1,40 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import java.text.Normalizer +import scala.annotation.tailrec + +final case class CanonicalTurkicFullCaseFoldedString private (override val toString: String) + extends AnyVal + +object CanonicalTurkicFullCaseFoldedString { + def apply(value: String): CanonicalTurkicFullCaseFoldedString = + new CanonicalTurkicFullCaseFoldedString( + Normalizer.normalize( + if (Normalizer.isNormalized(value, Normalizer.Form.NFD)) { + CaseFolding.turkicFullCaseFoldString(value) + } else { + CaseFolding.turkicFullCaseFoldString(Normalizer.normalize(value, Normalizer.Form.NFD)) + }, + Normalizer.Form.NFD + ) + ) + + val empty: CanonicalTurkicFullCaseFoldedString = + apply("") +} diff --git a/core/src/main/scala/org/typelevel/ci/CanonicalTurkicSimpleCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CanonicalTurkicSimpleCaseFoldedString.scala new file mode 100644 index 0000000..cf0d00b --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CanonicalTurkicSimpleCaseFoldedString.scala @@ -0,0 +1,40 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import java.text.Normalizer +import scala.annotation.tailrec + +final case class CanonicalTurkicSimpleCaseFoldedString private (override val toString: String) + extends AnyVal + +object CanonicalTurkicSimpleCaseFoldedString { + def apply(value: String): CanonicalTurkicSimpleCaseFoldedString = + new CanonicalTurkicSimpleCaseFoldedString( + Normalizer.normalize( + if (Normalizer.isNormalized(value, Normalizer.Form.NFD)) { + CaseFolding.turkicSimpleCaseFoldString(value) + } else { + CaseFolding.turkicSimpleCaseFoldString(Normalizer.normalize(value, Normalizer.Form.NFD)) + }, + Normalizer.Form.NFD + ) + ) + + val empty: CanonicalTurkicSimpleCaseFoldedString = + apply("") +} diff --git a/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala deleted file mode 100644 index 00a980f..0000000 --- a/core/src/main/scala/org/typelevel/ci/CaseFoldedString.scala +++ /dev/null @@ -1,162 +0,0 @@ -package org.typelevel.ci - -import cats._ -import cats.kernel.LowerBounded -import org.typelevel.ci.compat._ -import scala.annotation.tailrec - -/** A case folded `String`. This is a `String` which has been converted into a - * state which is suitable for case insensitive matching under the Unicode - * standard. - * - * This type differs from [[CIString]] in that it does ''not'' retain the - * original input `String` value. That is, this is a destructive - * transformation. You should use [[CaseFoldedString]] instead of - * [[CIString]] when you only want the case insensitive `String` and you - * never want to return the `String` back into the input value. In such cases - * [[CaseFoldedString]] will be more efficient than [[CIString]] as it only - * has to keep around a single `String` in memory. - * - * Case insensitive `String` values under Unicode are not always intuitive, - * especially on the JVM. There are three character cases to consider, lower - * case, upper case, and title case, and not all Unicode codePoints have all - * 3, some only have 2, some only 1. For some codePoints, the JRE standard - * operations don't always work as you'd expect. - * - * {{{ - * scala> val codePoint: Int = 8093 - * val codePoint: Int = 8093 - * - * scala> new String(Character.toChars(codePoint)) - * val res0: String = ᾝ - * - * scala> res0.toUpperCase - * val res1: String = ἭΙ - * - * scala> res0.toUpperCase.toLowerCase == res0.toLowerCase - * val res2: Boolean = false - * - * scala> Character.getName(res0.head) - * val res3: String = GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI - * - * scala> res0.toUpperCase.toLowerCase.equalsIgnoreCase(res0.toLowerCase) - * val res4: Boolean = false - * }}} - * - * In this example, given the Unicode character \u1f9d, converting it to - * upper case, then to lower case, is not equal under normal String - * equality. `String.equalsIgnoreCase` also does not work correctly by the - * Unicode standard. - * - * Making matters more complicated, for certain Turkic languages, the case - * folding rules change. See the Unicode standard for a full discussion of - * the topic. - * - * @note For most `String` values the `toString` form of this is lower case - * (when the given character has more than one case), but this is not - * always the case. Certain Unicode scripts have exceptions to this and - * will be case folded into upper case. If you want/need an only lower - * case `String`, you should call `.toString.toLowerCase`. - * - * @see [[https://www.unicode.org/versions/Unicode14.0.0/ch05.pdf#G21790]] - */ -final case class CaseFoldedString private (override val toString: String) extends AnyVal { - - def isEmpty: Boolean = toString.isEmpty - - def nonEmpty: Boolean = !isEmpty - - def length: Int = toString.length - - def size: Int = length - - def trim: CaseFoldedString = - CaseFoldedString(toString.trim) - - private final def copy(toString: String): CaseFoldedString = - CaseFoldedString(toString) -} - -object CaseFoldedString { - - /** Create a [[CaseFoldedString]] from a `String`. - * - * @param turkicFoldingRules if `true`, use the case folding rules for - * applicable to some Turkic languages. - */ - def apply(value: String, turkicFoldingRules: Boolean): CaseFoldedString = { - val builder: java.lang.StringBuilder = new java.lang.StringBuilder(value.length * 3) - val foldCodePoint: Int => Array[Int] = - if (turkicFoldingRules) { - CaseFolding.turkicFullCaseFoldedCodePoints - } else { - CaseFolding.fullCaseFoldedCodePoints - } - - @tailrec - def loop(index: Int): String = - if (index >= value.length) { - builder.toString - } else { - val codePoint: Int = value.codePointAt(index) - foldCodePoint(codePoint).foreach(c => builder.appendCodePoint(c)) - val inc: Int = if (codePoint >= 0x10000) 2 else 1 - loop(index + inc) - } - - new CaseFoldedString(loop(0)) - } - - /** Create a [[CaseFoldedString]] from a `String`. - * - * @note This factory method does ''not'' use the Turkic case folding - * rules. For the majority of languages this is the correct method of - * case folding. If you know your `String` is specific to one of the - * Turkic languages which use special case folding rules, you can use - * the secondary factory method to enable case folding under those - * rules. - */ - def apply(value: String): CaseFoldedString = - apply(value, false) - - val empty: CaseFoldedString = - CaseFoldedString("") - - implicit val hashAndOrderForCaseFoldedString: Hash[CaseFoldedString] with Order[CaseFoldedString] = - new Hash[CaseFoldedString] with Order[CaseFoldedString] { - override def hash(x: CaseFoldedString): Int = - x.hashCode - - override def compare(x: CaseFoldedString, y: CaseFoldedString): Int = - x.toString.compare(y.toString) - } - - implicit val orderingForCaseFoldedString: Ordering[CaseFoldedString] = - hashAndOrderForCaseFoldedString.toOrdering - - implicit val showForCaseFoldedString: Show[CaseFoldedString] = - Show.fromToString - - implicit val lowerBoundForCaseFoldedString: LowerBounded[CaseFoldedString] = - new LowerBounded[CaseFoldedString] { - override val partialOrder: PartialOrder[CaseFoldedString] = - hashAndOrderForCaseFoldedString - - override val minBound: CaseFoldedString = - empty - } - - implicit val monoidForCaseFoldedString: Monoid[CaseFoldedString] = - new Monoid[CaseFoldedString] { - override val empty: CaseFoldedString = CaseFoldedString.empty - - override def combine(x: CaseFoldedString, y: CaseFoldedString): CaseFoldedString = - new CaseFoldedString(x.toString + y.toString) - - override def combineAll(xs: IterableOnce[CaseFoldedString]): CaseFoldedString = { - val sb: StringBuilder = new StringBuilder - xs.iterator.foreach(cfs => sb.append(cfs.toString)) - new CaseFoldedString(sb.toString) - } - } -} diff --git a/core/src/main/scala/org/typelevel/ci/CaseFolding.scala b/core/src/main/scala/org/typelevel/ci/CaseFolding.scala index 00a71de..72d1674 100644 --- a/core/src/main/scala/org/typelevel/ci/CaseFolding.scala +++ b/core/src/main/scala/org/typelevel/ci/CaseFolding.scala @@ -1,14 +1,34 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.typelevel.ci -/** These are lookup tables for case folding. There are several different case - * folding algorithms which can be employed with different trade offs. +import scala.annotation.tailrec + +/** These are lookup tables for case folding. There are several different case folding algorithms + * which can be employed with different trade offs. * - * @note Some case folding, in particular full case folding, can yield more - * codePoints than the original value. That is, it can ''increase'' the - * size of `String` values once folded. + * @note + * Some case folding, in particular full case folding, can yield more codePoints than the + * original value. That is, it can ''increase'' the size of `String` values once folded. * - * @see [[https://www.unicode.org/versions/Unicode14.0.0/ch05.pdf#G21790 Caseless Matching]] - * @see [[https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt Unicode Case Folding Tables]] + * @see + * [[https://www.unicode.org/versions/Unicode14.0.0/ch05.pdf#G21790 Caseless Matching]] + * @see + * [[https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt Unicode Case Folding Tables]] */ private[ci] object CaseFolding { @@ -24,9 +44,76 @@ private[ci] object CaseFolding { // 66: 12263 // }}} - /** This function transforms a Unicode codePoint into it's full case folded - * variant, with the rule changes which are applicable to ''some'' Turkic - * languages. + def fullCaseFoldString(value: String): String = { + val builder: java.lang.StringBuilder = new java.lang.StringBuilder(value.length * 3) + + @tailrec + def loop(index: Int): String = + if (index >= value.length) { + builder.toString + } else { + val codePoint: Int = value.codePointAt(index) + fullCaseFoldedCodePoints(codePoint).foreach(c => builder.appendCodePoint(c)) + val inc: Int = if (codePoint >= 0x10000) 2 else 1 + loop(index + inc) + } + + loop(0) + } + + def turkicFullCaseFoldString(value: String): String = { + val builder: java.lang.StringBuilder = new java.lang.StringBuilder(value.length * 3) + + @tailrec + def loop(index: Int): String = + if (index >= value.length) { + builder.toString + } else { + val codePoint: Int = value.codePointAt(index) + turkicFullCaseFoldedCodePoints(codePoint).foreach(c => builder.appendCodePoint(c)) + val inc: Int = if (codePoint >= 0x10000) 2 else 1 + loop(index + inc) + } + + loop(0) + } + + def simpleCaseFoldString(value: String): String = { + val builder: java.lang.StringBuilder = new java.lang.StringBuilder(value.length * 3) + + @tailrec + def loop(index: Int): String = + if (index >= value.length) { + builder.toString + } else { + val codePoint: Int = value.codePointAt(index) + builder.appendCodePoint(simpleCaseFoldedCodePoints(codePoint)) + val inc: Int = if (codePoint >= 0x10000) 2 else 1 + loop(index + inc) + } + + loop(0) + } + + def turkicSimpleCaseFoldString(value: String): String = { + val builder: java.lang.StringBuilder = new java.lang.StringBuilder(value.length * 3) + + @tailrec + def loop(index: Int): String = + if (index >= value.length) { + builder.toString + } else { + val codePoint: Int = value.codePointAt(index) + builder.appendCodePoint(turkicSimpleCaseFoldedCodePoints(codePoint)) + val inc: Int = if (codePoint >= 0x10000) 2 else 1 + loop(index + inc) + } + + loop(0) + } + + /** This function transforms a Unicode codePoint into it's full case folded variant, with the rule + * changes which are applicable to ''some'' Turkic languages. * * For other languages these rules should not be applied. */ @@ -38,9 +125,8 @@ private[ci] object CaseFolding { fullCaseFoldedCodePoints(codePoint) } - /** This function transforms a Unicode codePoint into it's simple case folded - * variant, with the rule changes which are applicable to ''some'' Turkic - * languages. + /** This function transforms a Unicode codePoint into it's simple case folded variant, with the + * rule changes which are applicable to ''some'' Turkic languages. * * For other languages these rules should not be applied. */ @@ -52,165 +138,253 @@ private[ci] object CaseFolding { simpleCaseFoldedCodePoints(codePoint) } - /** This function transforms a Unicode codePoint into it's full case folded - * variant using the default rules. + /** This function transforms a Unicode codePoint into it's full case folded variant using the + * default rules. * * It is equivalent to the "C + F" rules from `CaseFolding.txt`. */ def fullCaseFoldedCodePoints(codePoint: Int): Array[Int] = codePoint match { - case 0x00DF => Array(0x0073, 0x0073) // LATIN SMALL LETTER SHARP S + case 0x00df => Array(0x0073, 0x0073) // LATIN SMALL LETTER SHARP S case 0x0130 => Array(0x0069, 0x0307) // LATIN CAPITAL LETTER I WITH DOT ABOVE - case 0x0149 => Array(0x02BC, 0x006E) // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE - case 0x01F0 => Array(0x006A, 0x030C) // LATIN SMALL LETTER J WITH CARON - case 0x0390 => Array(0x03B9, 0x0308, 0x0301) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - case 0x03B0 => Array(0x03C5, 0x0308, 0x0301) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS + case 0x0149 => Array(0x02bc, 0x006e) // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE + case 0x01f0 => Array(0x006a, 0x030c) // LATIN SMALL LETTER J WITH CARON + case 0x0390 => + Array(0x03b9, 0x0308, 0x0301) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + case 0x03b0 => + Array(0x03c5, 0x0308, 0x0301) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS case 0x0587 => Array(0x0565, 0x0582) // ARMENIAN SMALL LIGATURE ECH YIWN - case 0x1E96 => Array(0x0068, 0x0331) // LATIN SMALL LETTER H WITH LINE BELOW - case 0x1E97 => Array(0x0074, 0x0308) // LATIN SMALL LETTER T WITH DIAERESIS - case 0x1E98 => Array(0x0077, 0x030A) // LATIN SMALL LETTER W WITH RING ABOVE - case 0x1E99 => Array(0x0079, 0x030A) // LATIN SMALL LETTER Y WITH RING ABOVE - case 0x1E9A => Array(0x0061, 0x02BE) // LATIN SMALL LETTER A WITH RIGHT HALF RING - case 0x1E9E => Array(0x0073, 0x0073) // LATIN CAPITAL LETTER SHARP S - case 0x1F50 => Array(0x03C5, 0x0313) // GREEK SMALL LETTER UPSILON WITH PSILI - case 0x1F52 => Array(0x03C5, 0x0313, 0x0300) // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA - case 0x1F54 => Array(0x03C5, 0x0313, 0x0301) // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA - case 0x1F56 => Array(0x03C5, 0x0313, 0x0342) // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI - case 0x1F80 => Array(0x1F00, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI - case 0x1F81 => Array(0x1F01, 0x03B9) // GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI - case 0x1F82 => Array(0x1F02, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI - case 0x1F83 => Array(0x1F03, 0x03B9) // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI - case 0x1F84 => Array(0x1F04, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI - case 0x1F85 => Array(0x1F05, 0x03B9) // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI - case 0x1F86 => Array(0x1F06, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI - case 0x1F87 => Array(0x1F07, 0x03B9) // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI - case 0x1F88 => Array(0x1F00, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI - case 0x1F89 => Array(0x1F01, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI - case 0x1F8A => Array(0x1F02, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI - case 0x1F8B => Array(0x1F03, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI - case 0x1F8C => Array(0x1F04, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI - case 0x1F8D => Array(0x1F05, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI - case 0x1F8E => Array(0x1F06, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI - case 0x1F8F => Array(0x1F07, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI - case 0x1F90 => Array(0x1F20, 0x03B9) // GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI - case 0x1F91 => Array(0x1F21, 0x03B9) // GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI - case 0x1F92 => Array(0x1F22, 0x03B9) // GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI - case 0x1F93 => Array(0x1F23, 0x03B9) // GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI - case 0x1F94 => Array(0x1F24, 0x03B9) // GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI - case 0x1F95 => Array(0x1F25, 0x03B9) // GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI - case 0x1F96 => Array(0x1F26, 0x03B9) // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI - case 0x1F97 => Array(0x1F27, 0x03B9) // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI - case 0x1F98 => Array(0x1F20, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI - case 0x1F99 => Array(0x1F21, 0x03B9) // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI - case 0x1F9A => Array(0x1F22, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI - case 0x1F9B => Array(0x1F23, 0x03B9) // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI - case 0x1F9C => Array(0x1F24, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI - case 0x1F9D => Array(0x1F25, 0x03B9) // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI - case 0x1F9E => Array(0x1F26, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI - case 0x1F9F => Array(0x1F27, 0x03B9) // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI - case 0x1FA0 => Array(0x1F60, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI - case 0x1FA1 => Array(0x1F61, 0x03B9) // GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI - case 0x1FA2 => Array(0x1F62, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI - case 0x1FA3 => Array(0x1F63, 0x03B9) // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI - case 0x1FA4 => Array(0x1F64, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI - case 0x1FA5 => Array(0x1F65, 0x03B9) // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI - case 0x1FA6 => Array(0x1F66, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI - case 0x1FA7 => Array(0x1F67, 0x03B9) // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI - case 0x1FA8 => Array(0x1F60, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI - case 0x1FA9 => Array(0x1F61, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI - case 0x1FAA => Array(0x1F62, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI - case 0x1FAB => Array(0x1F63, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI - case 0x1FAC => Array(0x1F64, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI - case 0x1FAD => Array(0x1F65, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI - case 0x1FAE => Array(0x1F66, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI - case 0x1FAF => Array(0x1F67, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI - case 0x1FB2 => Array(0x1F70, 0x03B9) // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI - case 0x1FB3 => Array(0x03B1, 0x03B9) // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI - case 0x1FB4 => Array(0x03AC, 0x03B9) // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI - case 0x1FB6 => Array(0x03B1, 0x0342) // GREEK SMALL LETTER ALPHA WITH PERISPOMENI - case 0x1FB7 => Array(0x03B1, 0x0342, 0x03B9) // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI - case 0x1FBC => Array(0x03B1, 0x03B9) // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI - case 0x1FC2 => Array(0x1F74, 0x03B9) // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI - case 0x1FC3 => Array(0x03B7, 0x03B9) // GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI - case 0x1FC4 => Array(0x03AE, 0x03B9) // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI - case 0x1FC6 => Array(0x03B7, 0x0342) // GREEK SMALL LETTER ETA WITH PERISPOMENI - case 0x1FC7 => Array(0x03B7, 0x0342, 0x03B9) // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI - case 0x1FCC => Array(0x03B7, 0x03B9) // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI - case 0x1FD2 => Array(0x03B9, 0x0308, 0x0300) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA - case 0x1FD3 => Array(0x03B9, 0x0308, 0x0301) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA - case 0x1FD6 => Array(0x03B9, 0x0342) // GREEK SMALL LETTER IOTA WITH PERISPOMENI - case 0x1FD7 => Array(0x03B9, 0x0308, 0x0342) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI - case 0x1FE2 => Array(0x03C5, 0x0308, 0x0300) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA - case 0x1FE3 => Array(0x03C5, 0x0308, 0x0301) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA - case 0x1FE4 => Array(0x03C1, 0x0313) // GREEK SMALL LETTER RHO WITH PSILI - case 0x1FE6 => Array(0x03C5, 0x0342) // GREEK SMALL LETTER UPSILON WITH PERISPOMENI - case 0x1FE7 => Array(0x03C5, 0x0308, 0x0342) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI - case 0x1FF2 => Array(0x1F7C, 0x03B9) // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI - case 0x1FF3 => Array(0x03C9, 0x03B9) // GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI - case 0x1FF4 => Array(0x03CE, 0x03B9) // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI - case 0x1FF6 => Array(0x03C9, 0x0342) // GREEK SMALL LETTER OMEGA WITH PERISPOMENI - case 0x1FF7 => Array(0x03C9, 0x0342, 0x03B9) // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI - case 0x1FFC => Array(0x03C9, 0x03B9) // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI - case 0xFB00 => Array(0x0066, 0x0066) // LATIN SMALL LIGATURE FF - case 0xFB01 => Array(0x0066, 0x0069) // LATIN SMALL LIGATURE FI - case 0xFB02 => Array(0x0066, 0x006C) // LATIN SMALL LIGATURE FL - case 0xFB03 => Array(0x0066, 0x0066, 0x0069) // LATIN SMALL LIGATURE FFI - case 0xFB04 => Array(0x0066, 0x0066, 0x006C) // LATIN SMALL LIGATURE FFL - case 0xFB05 => Array(0x0073, 0x0074) // LATIN SMALL LIGATURE LONG S T - case 0xFB06 => Array(0x0073, 0x0074) // LATIN SMALL LIGATURE ST - case 0xFB13 => Array(0x0574, 0x0576) // ARMENIAN SMALL LIGATURE MEN NOW - case 0xFB14 => Array(0x0574, 0x0565) // ARMENIAN SMALL LIGATURE MEN ECH - case 0xFB15 => Array(0x0574, 0x056B) // ARMENIAN SMALL LIGATURE MEN INI - case 0xFB16 => Array(0x057E, 0x0576) // ARMENIAN SMALL LIGATURE VEW NOW - case 0xFB17 => Array(0x0574, 0x056D) // ARMENIAN SMALL LIGATURE MEN XEH + case 0x1e96 => Array(0x0068, 0x0331) // LATIN SMALL LETTER H WITH LINE BELOW + case 0x1e97 => Array(0x0074, 0x0308) // LATIN SMALL LETTER T WITH DIAERESIS + case 0x1e98 => Array(0x0077, 0x030a) // LATIN SMALL LETTER W WITH RING ABOVE + case 0x1e99 => Array(0x0079, 0x030a) // LATIN SMALL LETTER Y WITH RING ABOVE + case 0x1e9a => Array(0x0061, 0x02be) // LATIN SMALL LETTER A WITH RIGHT HALF RING + case 0x1e9e => Array(0x0073, 0x0073) // LATIN CAPITAL LETTER SHARP S + case 0x1f50 => Array(0x03c5, 0x0313) // GREEK SMALL LETTER UPSILON WITH PSILI + case 0x1f52 => + Array(0x03c5, 0x0313, 0x0300) // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA + case 0x1f54 => Array(0x03c5, 0x0313, 0x0301) // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA + case 0x1f56 => + Array(0x03c5, 0x0313, 0x0342) // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI + case 0x1f80 => Array(0x1f00, 0x03b9) // GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI + case 0x1f81 => Array(0x1f01, 0x03b9) // GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI + case 0x1f82 => + Array(0x1f02, 0x03b9) // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI + case 0x1f83 => + Array(0x1f03, 0x03b9) // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI + case 0x1f84 => + Array(0x1f04, 0x03b9) // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI + case 0x1f85 => + Array(0x1f05, 0x03b9) // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI + case 0x1f86 => + Array( + 0x1f06, + 0x03b9 + ) // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1f87 => + Array( + 0x1f07, + 0x03b9 + ) // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1f88 => + Array(0x1f00, 0x03b9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI + case 0x1f89 => + Array(0x1f01, 0x03b9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI + case 0x1f8a => + Array(0x1f02, 0x03b9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1f8b => + Array(0x1f03, 0x03b9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1f8c => + Array(0x1f04, 0x03b9) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1f8d => + Array(0x1f05, 0x03b9) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1f8e => + Array( + 0x1f06, + 0x03b9 + ) // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1f8f => + Array( + 0x1f07, + 0x03b9 + ) // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1f90 => Array(0x1f20, 0x03b9) // GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI + case 0x1f91 => Array(0x1f21, 0x03b9) // GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI + case 0x1f92 => + Array(0x1f22, 0x03b9) // GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI + case 0x1f93 => + Array(0x1f23, 0x03b9) // GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI + case 0x1f94 => + Array(0x1f24, 0x03b9) // GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI + case 0x1f95 => + Array(0x1f25, 0x03b9) // GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI + case 0x1f96 => + Array(0x1f26, 0x03b9) // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1f97 => + Array(0x1f27, 0x03b9) // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1f98 => Array(0x1f20, 0x03b9) // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI + case 0x1f99 => Array(0x1f21, 0x03b9) // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI + case 0x1f9a => + Array(0x1f22, 0x03b9) // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1f9b => + Array(0x1f23, 0x03b9) // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1f9c => + Array(0x1f24, 0x03b9) // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1f9d => + Array(0x1f25, 0x03b9) // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1f9e => + Array( + 0x1f26, + 0x03b9 + ) // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1f9f => + Array( + 0x1f27, + 0x03b9 + ) // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1fa0 => Array(0x1f60, 0x03b9) // GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI + case 0x1fa1 => Array(0x1f61, 0x03b9) // GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI + case 0x1fa2 => + Array(0x1f62, 0x03b9) // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI + case 0x1fa3 => + Array(0x1f63, 0x03b9) // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI + case 0x1fa4 => + Array(0x1f64, 0x03b9) // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI + case 0x1fa5 => + Array(0x1f65, 0x03b9) // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI + case 0x1fa6 => + Array( + 0x1f66, + 0x03b9 + ) // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1fa7 => + Array( + 0x1f67, + 0x03b9 + ) // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + case 0x1fa8 => + Array(0x1f60, 0x03b9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI + case 0x1fa9 => + Array(0x1f61, 0x03b9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI + case 0x1faa => + Array(0x1f62, 0x03b9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1fab => + Array(0x1f63, 0x03b9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1fac => + Array(0x1f64, 0x03b9) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1fad => + Array(0x1f65, 0x03b9) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1fae => + Array( + 0x1f66, + 0x03b9 + ) // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1faf => + Array( + 0x1f67, + 0x03b9 + ) // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1fb2 => Array(0x1f70, 0x03b9) // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI + case 0x1fb3 => Array(0x03b1, 0x03b9) // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI + case 0x1fb4 => Array(0x03ac, 0x03b9) // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI + case 0x1fb6 => Array(0x03b1, 0x0342) // GREEK SMALL LETTER ALPHA WITH PERISPOMENI + case 0x1fb7 => + Array(0x03b1, 0x0342, 0x03b9) // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI + case 0x1fbc => Array(0x03b1, 0x03b9) // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + case 0x1fc2 => Array(0x1f74, 0x03b9) // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI + case 0x1fc3 => Array(0x03b7, 0x03b9) // GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI + case 0x1fc4 => Array(0x03ae, 0x03b9) // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI + case 0x1fc6 => Array(0x03b7, 0x0342) // GREEK SMALL LETTER ETA WITH PERISPOMENI + case 0x1fc7 => + Array(0x03b7, 0x0342, 0x03b9) // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI + case 0x1fcc => Array(0x03b7, 0x03b9) // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + case 0x1fd2 => + Array(0x03b9, 0x0308, 0x0300) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA + case 0x1fd3 => + Array(0x03b9, 0x0308, 0x0301) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA + case 0x1fd6 => Array(0x03b9, 0x0342) // GREEK SMALL LETTER IOTA WITH PERISPOMENI + case 0x1fd7 => + Array(0x03b9, 0x0308, 0x0342) // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI + case 0x1fe2 => + Array(0x03c5, 0x0308, 0x0300) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA + case 0x1fe3 => + Array(0x03c5, 0x0308, 0x0301) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA + case 0x1fe4 => Array(0x03c1, 0x0313) // GREEK SMALL LETTER RHO WITH PSILI + case 0x1fe6 => Array(0x03c5, 0x0342) // GREEK SMALL LETTER UPSILON WITH PERISPOMENI + case 0x1fe7 => + Array(0x03c5, 0x0308, 0x0342) // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI + case 0x1ff2 => Array(0x1f7c, 0x03b9) // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI + case 0x1ff3 => Array(0x03c9, 0x03b9) // GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI + case 0x1ff4 => Array(0x03ce, 0x03b9) // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI + case 0x1ff6 => Array(0x03c9, 0x0342) // GREEK SMALL LETTER OMEGA WITH PERISPOMENI + case 0x1ff7 => + Array(0x03c9, 0x0342, 0x03b9) // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI + case 0x1ffc => Array(0x03c9, 0x03b9) // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + case 0xfb00 => Array(0x0066, 0x0066) // LATIN SMALL LIGATURE FF + case 0xfb01 => Array(0x0066, 0x0069) // LATIN SMALL LIGATURE FI + case 0xfb02 => Array(0x0066, 0x006c) // LATIN SMALL LIGATURE FL + case 0xfb03 => Array(0x0066, 0x0066, 0x0069) // LATIN SMALL LIGATURE FFI + case 0xfb04 => Array(0x0066, 0x0066, 0x006c) // LATIN SMALL LIGATURE FFL + case 0xfb05 => Array(0x0073, 0x0074) // LATIN SMALL LIGATURE LONG S T + case 0xfb06 => Array(0x0073, 0x0074) // LATIN SMALL LIGATURE ST + case 0xfb13 => Array(0x0574, 0x0576) // ARMENIAN SMALL LIGATURE MEN NOW + case 0xfb14 => Array(0x0574, 0x0565) // ARMENIAN SMALL LIGATURE MEN ECH + case 0xfb15 => Array(0x0574, 0x056b) // ARMENIAN SMALL LIGATURE MEN INI + case 0xfb16 => Array(0x057e, 0x0576) // ARMENIAN SMALL LIGATURE VEW NOW + case 0xfb17 => Array(0x0574, 0x056d) // ARMENIAN SMALL LIGATURE MEN XEH case _ => // The full rules defer to the common rules Array(commonCaseFoldedCodePoints(codePoint)) } - /** This function transforms a Unicode codePoint into it's simple case folded - * variant using the default rules. + /** This function transforms a Unicode codePoint into it's simple case folded variant using the + * default rules. * * It is equivalent to the "C + S" rules from `CaseFolding.txt`. */ def simpleCaseFoldedCodePoints(codePoint: Int): Int = codePoint match { - case 0x1E9E => 0x00DF // LATIN CAPITAL LETTER SHARP S - case 0x1F88 => 0x1F80 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI - case 0x1F89 => 0x1F81 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI - case 0x1F8A => 0x1F82 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI - case 0x1F8B => 0x1F83 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI - case 0x1F8C => 0x1F84 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI - case 0x1F8D => 0x1F85 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI - case 0x1F8E => 0x1F86 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI - case 0x1F8F => 0x1F87 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI - case 0x1F98 => 0x1F90 // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI - case 0x1F99 => 0x1F91 // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI - case 0x1F9A => 0x1F92 // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI - case 0x1F9B => 0x1F93 // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI - case 0x1F9C => 0x1F94 // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI - case 0x1F9D => 0x1F95 // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI - case 0x1F9E => 0x1F96 // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI - case 0x1F9F => 0x1F97 // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI - case 0x1FA8 => 0x1FA0 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI - case 0x1FA9 => 0x1FA1 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI - case 0x1FAA => 0x1FA2 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI - case 0x1FAB => 0x1FA3 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI - case 0x1FAC => 0x1FA4 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI - case 0x1FAD => 0x1FA5 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI - case 0x1FAE => 0x1FA6 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI - case 0x1FAF => 0x1FA7 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI - case 0x1FBC => 0x1FB3 // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI - case 0x1FCC => 0x1FC3 // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI - case 0x1FFC => 0x1FF3 // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + case 0x1e9e => 0x00df // LATIN CAPITAL LETTER SHARP S + case 0x1f88 => 0x1f80 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI + case 0x1f89 => 0x1f81 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI + case 0x1f8a => 0x1f82 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1f8b => 0x1f83 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1f8c => 0x1f84 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1f8d => 0x1f85 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1f8e => + 0x1f86 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1f8f => + 0x1f87 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1f98 => 0x1f90 // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI + case 0x1f99 => 0x1f91 // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI + case 0x1f9a => 0x1f92 // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1f9b => 0x1f93 // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1f9c => 0x1f94 // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1f9d => 0x1f95 // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1f9e => + 0x1f96 // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1f9f => + 0x1f97 // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1fa8 => 0x1fa0 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI + case 0x1fa9 => 0x1fa1 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI + case 0x1faa => 0x1fa2 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI + case 0x1fab => 0x1fa3 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI + case 0x1fac => 0x1fa4 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI + case 0x1fad => 0x1fa5 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI + case 0x1fae => + 0x1fa6 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1faf => + 0x1fa7 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + case 0x1fbc => 0x1fb3 // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + case 0x1fcc => 0x1fc3 // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + case 0x1ffc => 0x1ff3 // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI case _ => commonCaseFoldedCodePoints(codePoint) } - /** This function transforms a Unicode codePoint into it's common case folded - * form. + /** This function transforms a Unicode codePoint into it's common case folded form. * - * This lookup can only be validly used in concert with either the simple - * or full case folding rules (with or without the special cases for some - * Turkic languages). This is why this function is `private`. + * This lookup can only be validly used in concert with either the simple or full case folding + * rules (with or without the special cases for some Turkic languages). This is why this function + * is `private`. */ private def commonCaseFoldedCodePoints(codePoint: Int): Int = codePoint match { @@ -223,12 +397,12 @@ private[ci] object CaseFolding { case 0x0047 => 0x0067 // LATIN CAPITAL LETTER G case 0x0048 => 0x0068 // LATIN CAPITAL LETTER H case 0x0049 => 0x0069 // LATIN CAPITAL LETTER I - case 0x004A => 0x006A // LATIN CAPITAL LETTER J - case 0x004B => 0x006B // LATIN CAPITAL LETTER K - case 0x004C => 0x006C // LATIN CAPITAL LETTER L - case 0x004D => 0x006D // LATIN CAPITAL LETTER M - case 0x004E => 0x006E // LATIN CAPITAL LETTER N - case 0x004F => 0x006F // LATIN CAPITAL LETTER O + case 0x004a => 0x006a // LATIN CAPITAL LETTER J + case 0x004b => 0x006b // LATIN CAPITAL LETTER K + case 0x004c => 0x006c // LATIN CAPITAL LETTER L + case 0x004d => 0x006d // LATIN CAPITAL LETTER M + case 0x004e => 0x006e // LATIN CAPITAL LETTER N + case 0x004f => 0x006f // LATIN CAPITAL LETTER O case 0x0050 => 0x0070 // LATIN CAPITAL LETTER P case 0x0051 => 0x0071 // LATIN CAPITAL LETTER Q case 0x0052 => 0x0072 // LATIN CAPITAL LETTER R @@ -239,274 +413,274 @@ private[ci] object CaseFolding { case 0x0057 => 0x0077 // LATIN CAPITAL LETTER W case 0x0058 => 0x0078 // LATIN CAPITAL LETTER X case 0x0059 => 0x0079 // LATIN CAPITAL LETTER Y - case 0x005A => 0x007A // LATIN CAPITAL LETTER Z - case 0x00B5 => 0x03BC // MICRO SIGN - case 0x00C0 => 0x00E0 // LATIN CAPITAL LETTER A WITH GRAVE - case 0x00C1 => 0x00E1 // LATIN CAPITAL LETTER A WITH ACUTE - case 0x00C2 => 0x00E2 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX - case 0x00C3 => 0x00E3 // LATIN CAPITAL LETTER A WITH TILDE - case 0x00C4 => 0x00E4 // LATIN CAPITAL LETTER A WITH DIAERESIS - case 0x00C5 => 0x00E5 // LATIN CAPITAL LETTER A WITH RING ABOVE - case 0x00C6 => 0x00E6 // LATIN CAPITAL LETTER AE - case 0x00C7 => 0x00E7 // LATIN CAPITAL LETTER C WITH CEDILLA - case 0x00C8 => 0x00E8 // LATIN CAPITAL LETTER E WITH GRAVE - case 0x00C9 => 0x00E9 // LATIN CAPITAL LETTER E WITH ACUTE - case 0x00CA => 0x00EA // LATIN CAPITAL LETTER E WITH CIRCUMFLEX - case 0x00CB => 0x00EB // LATIN CAPITAL LETTER E WITH DIAERESIS - case 0x00CC => 0x00EC // LATIN CAPITAL LETTER I WITH GRAVE - case 0x00CD => 0x00ED // LATIN CAPITAL LETTER I WITH ACUTE - case 0x00CE => 0x00EE // LATIN CAPITAL LETTER I WITH CIRCUMFLEX - case 0x00CF => 0x00EF // LATIN CAPITAL LETTER I WITH DIAERESIS - case 0x00D0 => 0x00F0 // LATIN CAPITAL LETTER ETH - case 0x00D1 => 0x00F1 // LATIN CAPITAL LETTER N WITH TILDE - case 0x00D2 => 0x00F2 // LATIN CAPITAL LETTER O WITH GRAVE - case 0x00D3 => 0x00F3 // LATIN CAPITAL LETTER O WITH ACUTE - case 0x00D4 => 0x00F4 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX - case 0x00D5 => 0x00F5 // LATIN CAPITAL LETTER O WITH TILDE - case 0x00D6 => 0x00F6 // LATIN CAPITAL LETTER O WITH DIAERESIS - case 0x00D8 => 0x00F8 // LATIN CAPITAL LETTER O WITH STROKE - case 0x00D9 => 0x00F9 // LATIN CAPITAL LETTER U WITH GRAVE - case 0x00DA => 0x00FA // LATIN CAPITAL LETTER U WITH ACUTE - case 0x00DB => 0x00FB // LATIN CAPITAL LETTER U WITH CIRCUMFLEX - case 0x00DC => 0x00FC // LATIN CAPITAL LETTER U WITH DIAERESIS - case 0x00DD => 0x00FD // LATIN CAPITAL LETTER Y WITH ACUTE - case 0x00DE => 0x00FE // LATIN CAPITAL LETTER THORN + case 0x005a => 0x007a // LATIN CAPITAL LETTER Z + case 0x00b5 => 0x03bc // MICRO SIGN + case 0x00c0 => 0x00e0 // LATIN CAPITAL LETTER A WITH GRAVE + case 0x00c1 => 0x00e1 // LATIN CAPITAL LETTER A WITH ACUTE + case 0x00c2 => 0x00e2 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX + case 0x00c3 => 0x00e3 // LATIN CAPITAL LETTER A WITH TILDE + case 0x00c4 => 0x00e4 // LATIN CAPITAL LETTER A WITH DIAERESIS + case 0x00c5 => 0x00e5 // LATIN CAPITAL LETTER A WITH RING ABOVE + case 0x00c6 => 0x00e6 // LATIN CAPITAL LETTER AE + case 0x00c7 => 0x00e7 // LATIN CAPITAL LETTER C WITH CEDILLA + case 0x00c8 => 0x00e8 // LATIN CAPITAL LETTER E WITH GRAVE + case 0x00c9 => 0x00e9 // LATIN CAPITAL LETTER E WITH ACUTE + case 0x00ca => 0x00ea // LATIN CAPITAL LETTER E WITH CIRCUMFLEX + case 0x00cb => 0x00eb // LATIN CAPITAL LETTER E WITH DIAERESIS + case 0x00cc => 0x00ec // LATIN CAPITAL LETTER I WITH GRAVE + case 0x00cd => 0x00ed // LATIN CAPITAL LETTER I WITH ACUTE + case 0x00ce => 0x00ee // LATIN CAPITAL LETTER I WITH CIRCUMFLEX + case 0x00cf => 0x00ef // LATIN CAPITAL LETTER I WITH DIAERESIS + case 0x00d0 => 0x00f0 // LATIN CAPITAL LETTER ETH + case 0x00d1 => 0x00f1 // LATIN CAPITAL LETTER N WITH TILDE + case 0x00d2 => 0x00f2 // LATIN CAPITAL LETTER O WITH GRAVE + case 0x00d3 => 0x00f3 // LATIN CAPITAL LETTER O WITH ACUTE + case 0x00d4 => 0x00f4 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX + case 0x00d5 => 0x00f5 // LATIN CAPITAL LETTER O WITH TILDE + case 0x00d6 => 0x00f6 // LATIN CAPITAL LETTER O WITH DIAERESIS + case 0x00d8 => 0x00f8 // LATIN CAPITAL LETTER O WITH STROKE + case 0x00d9 => 0x00f9 // LATIN CAPITAL LETTER U WITH GRAVE + case 0x00da => 0x00fa // LATIN CAPITAL LETTER U WITH ACUTE + case 0x00db => 0x00fb // LATIN CAPITAL LETTER U WITH CIRCUMFLEX + case 0x00dc => 0x00fc // LATIN CAPITAL LETTER U WITH DIAERESIS + case 0x00dd => 0x00fd // LATIN CAPITAL LETTER Y WITH ACUTE + case 0x00de => 0x00fe // LATIN CAPITAL LETTER THORN case 0x0100 => 0x0101 // LATIN CAPITAL LETTER A WITH MACRON case 0x0102 => 0x0103 // LATIN CAPITAL LETTER A WITH BREVE case 0x0104 => 0x0105 // LATIN CAPITAL LETTER A WITH OGONEK case 0x0106 => 0x0107 // LATIN CAPITAL LETTER C WITH ACUTE case 0x0108 => 0x0109 // LATIN CAPITAL LETTER C WITH CIRCUMFLEX - case 0x010A => 0x010B // LATIN CAPITAL LETTER C WITH DOT ABOVE - case 0x010C => 0x010D // LATIN CAPITAL LETTER C WITH CARON - case 0x010E => 0x010F // LATIN CAPITAL LETTER D WITH CARON + case 0x010a => 0x010b // LATIN CAPITAL LETTER C WITH DOT ABOVE + case 0x010c => 0x010d // LATIN CAPITAL LETTER C WITH CARON + case 0x010e => 0x010f // LATIN CAPITAL LETTER D WITH CARON case 0x0110 => 0x0111 // LATIN CAPITAL LETTER D WITH STROKE case 0x0112 => 0x0113 // LATIN CAPITAL LETTER E WITH MACRON case 0x0114 => 0x0115 // LATIN CAPITAL LETTER E WITH BREVE case 0x0116 => 0x0117 // LATIN CAPITAL LETTER E WITH DOT ABOVE case 0x0118 => 0x0119 // LATIN CAPITAL LETTER E WITH OGONEK - case 0x011A => 0x011B // LATIN CAPITAL LETTER E WITH CARON - case 0x011C => 0x011D // LATIN CAPITAL LETTER G WITH CIRCUMFLEX - case 0x011E => 0x011F // LATIN CAPITAL LETTER G WITH BREVE + case 0x011a => 0x011b // LATIN CAPITAL LETTER E WITH CARON + case 0x011c => 0x011d // LATIN CAPITAL LETTER G WITH CIRCUMFLEX + case 0x011e => 0x011f // LATIN CAPITAL LETTER G WITH BREVE case 0x0120 => 0x0121 // LATIN CAPITAL LETTER G WITH DOT ABOVE case 0x0122 => 0x0123 // LATIN CAPITAL LETTER G WITH CEDILLA case 0x0124 => 0x0125 // LATIN CAPITAL LETTER H WITH CIRCUMFLEX case 0x0126 => 0x0127 // LATIN CAPITAL LETTER H WITH STROKE case 0x0128 => 0x0129 // LATIN CAPITAL LETTER I WITH TILDE - case 0x012A => 0x012B // LATIN CAPITAL LETTER I WITH MACRON - case 0x012C => 0x012D // LATIN CAPITAL LETTER I WITH BREVE - case 0x012E => 0x012F // LATIN CAPITAL LETTER I WITH OGONEK + case 0x012a => 0x012b // LATIN CAPITAL LETTER I WITH MACRON + case 0x012c => 0x012d // LATIN CAPITAL LETTER I WITH BREVE + case 0x012e => 0x012f // LATIN CAPITAL LETTER I WITH OGONEK case 0x0132 => 0x0133 // LATIN CAPITAL LIGATURE IJ case 0x0134 => 0x0135 // LATIN CAPITAL LETTER J WITH CIRCUMFLEX case 0x0136 => 0x0137 // LATIN CAPITAL LETTER K WITH CEDILLA - case 0x0139 => 0x013A // LATIN CAPITAL LETTER L WITH ACUTE - case 0x013B => 0x013C // LATIN CAPITAL LETTER L WITH CEDILLA - case 0x013D => 0x013E // LATIN CAPITAL LETTER L WITH CARON - case 0x013F => 0x0140 // LATIN CAPITAL LETTER L WITH MIDDLE DOT + case 0x0139 => 0x013a // LATIN CAPITAL LETTER L WITH ACUTE + case 0x013b => 0x013c // LATIN CAPITAL LETTER L WITH CEDILLA + case 0x013d => 0x013e // LATIN CAPITAL LETTER L WITH CARON + case 0x013f => 0x0140 // LATIN CAPITAL LETTER L WITH MIDDLE DOT case 0x0141 => 0x0142 // LATIN CAPITAL LETTER L WITH STROKE case 0x0143 => 0x0144 // LATIN CAPITAL LETTER N WITH ACUTE case 0x0145 => 0x0146 // LATIN CAPITAL LETTER N WITH CEDILLA case 0x0147 => 0x0148 // LATIN CAPITAL LETTER N WITH CARON - case 0x014A => 0x014B // LATIN CAPITAL LETTER ENG - case 0x014C => 0x014D // LATIN CAPITAL LETTER O WITH MACRON - case 0x014E => 0x014F // LATIN CAPITAL LETTER O WITH BREVE + case 0x014a => 0x014b // LATIN CAPITAL LETTER ENG + case 0x014c => 0x014d // LATIN CAPITAL LETTER O WITH MACRON + case 0x014e => 0x014f // LATIN CAPITAL LETTER O WITH BREVE case 0x0150 => 0x0151 // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE case 0x0152 => 0x0153 // LATIN CAPITAL LIGATURE OE case 0x0154 => 0x0155 // LATIN CAPITAL LETTER R WITH ACUTE case 0x0156 => 0x0157 // LATIN CAPITAL LETTER R WITH CEDILLA case 0x0158 => 0x0159 // LATIN CAPITAL LETTER R WITH CARON - case 0x015A => 0x015B // LATIN CAPITAL LETTER S WITH ACUTE - case 0x015C => 0x015D // LATIN CAPITAL LETTER S WITH CIRCUMFLEX - case 0x015E => 0x015F // LATIN CAPITAL LETTER S WITH CEDILLA + case 0x015a => 0x015b // LATIN CAPITAL LETTER S WITH ACUTE + case 0x015c => 0x015d // LATIN CAPITAL LETTER S WITH CIRCUMFLEX + case 0x015e => 0x015f // LATIN CAPITAL LETTER S WITH CEDILLA case 0x0160 => 0x0161 // LATIN CAPITAL LETTER S WITH CARON case 0x0162 => 0x0163 // LATIN CAPITAL LETTER T WITH CEDILLA case 0x0164 => 0x0165 // LATIN CAPITAL LETTER T WITH CARON case 0x0166 => 0x0167 // LATIN CAPITAL LETTER T WITH STROKE case 0x0168 => 0x0169 // LATIN CAPITAL LETTER U WITH TILDE - case 0x016A => 0x016B // LATIN CAPITAL LETTER U WITH MACRON - case 0x016C => 0x016D // LATIN CAPITAL LETTER U WITH BREVE - case 0x016E => 0x016F // LATIN CAPITAL LETTER U WITH RING ABOVE + case 0x016a => 0x016b // LATIN CAPITAL LETTER U WITH MACRON + case 0x016c => 0x016d // LATIN CAPITAL LETTER U WITH BREVE + case 0x016e => 0x016f // LATIN CAPITAL LETTER U WITH RING ABOVE case 0x0170 => 0x0171 // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE case 0x0172 => 0x0173 // LATIN CAPITAL LETTER U WITH OGONEK case 0x0174 => 0x0175 // LATIN CAPITAL LETTER W WITH CIRCUMFLEX case 0x0176 => 0x0177 // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX - case 0x0178 => 0x00FF // LATIN CAPITAL LETTER Y WITH DIAERESIS - case 0x0179 => 0x017A // LATIN CAPITAL LETTER Z WITH ACUTE - case 0x017B => 0x017C // LATIN CAPITAL LETTER Z WITH DOT ABOVE - case 0x017D => 0x017E // LATIN CAPITAL LETTER Z WITH CARON - case 0x017F => 0x0073 // LATIN SMALL LETTER LONG S + case 0x0178 => 0x00ff // LATIN CAPITAL LETTER Y WITH DIAERESIS + case 0x0179 => 0x017a // LATIN CAPITAL LETTER Z WITH ACUTE + case 0x017b => 0x017c // LATIN CAPITAL LETTER Z WITH DOT ABOVE + case 0x017d => 0x017e // LATIN CAPITAL LETTER Z WITH CARON + case 0x017f => 0x0073 // LATIN SMALL LETTER LONG S case 0x0181 => 0x0253 // LATIN CAPITAL LETTER B WITH HOOK case 0x0182 => 0x0183 // LATIN CAPITAL LETTER B WITH TOPBAR case 0x0184 => 0x0185 // LATIN CAPITAL LETTER TONE SIX case 0x0186 => 0x0254 // LATIN CAPITAL LETTER OPEN O case 0x0187 => 0x0188 // LATIN CAPITAL LETTER C WITH HOOK case 0x0189 => 0x0256 // LATIN CAPITAL LETTER AFRICAN D - case 0x018A => 0x0257 // LATIN CAPITAL LETTER D WITH HOOK - case 0x018B => 0x018C // LATIN CAPITAL LETTER D WITH TOPBAR - case 0x018E => 0x01DD // LATIN CAPITAL LETTER REVERSED E - case 0x018F => 0x0259 // LATIN CAPITAL LETTER SCHWA - case 0x0190 => 0x025B // LATIN CAPITAL LETTER OPEN E + case 0x018a => 0x0257 // LATIN CAPITAL LETTER D WITH HOOK + case 0x018b => 0x018c // LATIN CAPITAL LETTER D WITH TOPBAR + case 0x018e => 0x01dd // LATIN CAPITAL LETTER REVERSED E + case 0x018f => 0x0259 // LATIN CAPITAL LETTER SCHWA + case 0x0190 => 0x025b // LATIN CAPITAL LETTER OPEN E case 0x0191 => 0x0192 // LATIN CAPITAL LETTER F WITH HOOK case 0x0193 => 0x0260 // LATIN CAPITAL LETTER G WITH HOOK case 0x0194 => 0x0263 // LATIN CAPITAL LETTER GAMMA case 0x0196 => 0x0269 // LATIN CAPITAL LETTER IOTA case 0x0197 => 0x0268 // LATIN CAPITAL LETTER I WITH STROKE case 0x0198 => 0x0199 // LATIN CAPITAL LETTER K WITH HOOK - case 0x019C => 0x026F // LATIN CAPITAL LETTER TURNED M - case 0x019D => 0x0272 // LATIN CAPITAL LETTER N WITH LEFT HOOK - case 0x019F => 0x0275 // LATIN CAPITAL LETTER O WITH MIDDLE TILDE - case 0x01A0 => 0x01A1 // LATIN CAPITAL LETTER O WITH HORN - case 0x01A2 => 0x01A3 // LATIN CAPITAL LETTER OI - case 0x01A4 => 0x01A5 // LATIN CAPITAL LETTER P WITH HOOK - case 0x01A6 => 0x0280 // LATIN LETTER YR - case 0x01A7 => 0x01A8 // LATIN CAPITAL LETTER TONE TWO - case 0x01A9 => 0x0283 // LATIN CAPITAL LETTER ESH - case 0x01AC => 0x01AD // LATIN CAPITAL LETTER T WITH HOOK - case 0x01AE => 0x0288 // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK - case 0x01AF => 0x01B0 // LATIN CAPITAL LETTER U WITH HORN - case 0x01B1 => 0x028A // LATIN CAPITAL LETTER UPSILON - case 0x01B2 => 0x028B // LATIN CAPITAL LETTER V WITH HOOK - case 0x01B3 => 0x01B4 // LATIN CAPITAL LETTER Y WITH HOOK - case 0x01B5 => 0x01B6 // LATIN CAPITAL LETTER Z WITH STROKE - case 0x01B7 => 0x0292 // LATIN CAPITAL LETTER EZH - case 0x01B8 => 0x01B9 // LATIN CAPITAL LETTER EZH REVERSED - case 0x01BC => 0x01BD // LATIN CAPITAL LETTER TONE FIVE - case 0x01C4 => 0x01C6 // LATIN CAPITAL LETTER DZ WITH CARON - case 0x01C5 => 0x01C6 // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON - case 0x01C7 => 0x01C9 // LATIN CAPITAL LETTER LJ - case 0x01C8 => 0x01C9 // LATIN CAPITAL LETTER L WITH SMALL LETTER J - case 0x01CA => 0x01CC // LATIN CAPITAL LETTER NJ - case 0x01CB => 0x01CC // LATIN CAPITAL LETTER N WITH SMALL LETTER J - case 0x01CD => 0x01CE // LATIN CAPITAL LETTER A WITH CARON - case 0x01CF => 0x01D0 // LATIN CAPITAL LETTER I WITH CARON - case 0x01D1 => 0x01D2 // LATIN CAPITAL LETTER O WITH CARON - case 0x01D3 => 0x01D4 // LATIN CAPITAL LETTER U WITH CARON - case 0x01D5 => 0x01D6 // LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON - case 0x01D7 => 0x01D8 // LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE - case 0x01D9 => 0x01DA // LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON - case 0x01DB => 0x01DC // LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE - case 0x01DE => 0x01DF // LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON - case 0x01E0 => 0x01E1 // LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON - case 0x01E2 => 0x01E3 // LATIN CAPITAL LETTER AE WITH MACRON - case 0x01E4 => 0x01E5 // LATIN CAPITAL LETTER G WITH STROKE - case 0x01E6 => 0x01E7 // LATIN CAPITAL LETTER G WITH CARON - case 0x01E8 => 0x01E9 // LATIN CAPITAL LETTER K WITH CARON - case 0x01EA => 0x01EB // LATIN CAPITAL LETTER O WITH OGONEK - case 0x01EC => 0x01ED // LATIN CAPITAL LETTER O WITH OGONEK AND MACRON - case 0x01EE => 0x01EF // LATIN CAPITAL LETTER EZH WITH CARON - case 0x01F1 => 0x01F3 // LATIN CAPITAL LETTER DZ - case 0x01F2 => 0x01F3 // LATIN CAPITAL LETTER D WITH SMALL LETTER Z - case 0x01F4 => 0x01F5 // LATIN CAPITAL LETTER G WITH ACUTE - case 0x01F6 => 0x0195 // LATIN CAPITAL LETTER HWAIR - case 0x01F7 => 0x01BF // LATIN CAPITAL LETTER WYNN - case 0x01F8 => 0x01F9 // LATIN CAPITAL LETTER N WITH GRAVE - case 0x01FA => 0x01FB // LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE - case 0x01FC => 0x01FD // LATIN CAPITAL LETTER AE WITH ACUTE - case 0x01FE => 0x01FF // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE + case 0x019c => 0x026f // LATIN CAPITAL LETTER TURNED M + case 0x019d => 0x0272 // LATIN CAPITAL LETTER N WITH LEFT HOOK + case 0x019f => 0x0275 // LATIN CAPITAL LETTER O WITH MIDDLE TILDE + case 0x01a0 => 0x01a1 // LATIN CAPITAL LETTER O WITH HORN + case 0x01a2 => 0x01a3 // LATIN CAPITAL LETTER OI + case 0x01a4 => 0x01a5 // LATIN CAPITAL LETTER P WITH HOOK + case 0x01a6 => 0x0280 // LATIN LETTER YR + case 0x01a7 => 0x01a8 // LATIN CAPITAL LETTER TONE TWO + case 0x01a9 => 0x0283 // LATIN CAPITAL LETTER ESH + case 0x01ac => 0x01ad // LATIN CAPITAL LETTER T WITH HOOK + case 0x01ae => 0x0288 // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK + case 0x01af => 0x01b0 // LATIN CAPITAL LETTER U WITH HORN + case 0x01b1 => 0x028a // LATIN CAPITAL LETTER UPSILON + case 0x01b2 => 0x028b // LATIN CAPITAL LETTER V WITH HOOK + case 0x01b3 => 0x01b4 // LATIN CAPITAL LETTER Y WITH HOOK + case 0x01b5 => 0x01b6 // LATIN CAPITAL LETTER Z WITH STROKE + case 0x01b7 => 0x0292 // LATIN CAPITAL LETTER EZH + case 0x01b8 => 0x01b9 // LATIN CAPITAL LETTER EZH REVERSED + case 0x01bc => 0x01bd // LATIN CAPITAL LETTER TONE FIVE + case 0x01c4 => 0x01c6 // LATIN CAPITAL LETTER DZ WITH CARON + case 0x01c5 => 0x01c6 // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON + case 0x01c7 => 0x01c9 // LATIN CAPITAL LETTER LJ + case 0x01c8 => 0x01c9 // LATIN CAPITAL LETTER L WITH SMALL LETTER J + case 0x01ca => 0x01cc // LATIN CAPITAL LETTER NJ + case 0x01cb => 0x01cc // LATIN CAPITAL LETTER N WITH SMALL LETTER J + case 0x01cd => 0x01ce // LATIN CAPITAL LETTER A WITH CARON + case 0x01cf => 0x01d0 // LATIN CAPITAL LETTER I WITH CARON + case 0x01d1 => 0x01d2 // LATIN CAPITAL LETTER O WITH CARON + case 0x01d3 => 0x01d4 // LATIN CAPITAL LETTER U WITH CARON + case 0x01d5 => 0x01d6 // LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON + case 0x01d7 => 0x01d8 // LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE + case 0x01d9 => 0x01da // LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON + case 0x01db => 0x01dc // LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE + case 0x01de => 0x01df // LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON + case 0x01e0 => 0x01e1 // LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON + case 0x01e2 => 0x01e3 // LATIN CAPITAL LETTER AE WITH MACRON + case 0x01e4 => 0x01e5 // LATIN CAPITAL LETTER G WITH STROKE + case 0x01e6 => 0x01e7 // LATIN CAPITAL LETTER G WITH CARON + case 0x01e8 => 0x01e9 // LATIN CAPITAL LETTER K WITH CARON + case 0x01ea => 0x01eb // LATIN CAPITAL LETTER O WITH OGONEK + case 0x01ec => 0x01ed // LATIN CAPITAL LETTER O WITH OGONEK AND MACRON + case 0x01ee => 0x01ef // LATIN CAPITAL LETTER EZH WITH CARON + case 0x01f1 => 0x01f3 // LATIN CAPITAL LETTER DZ + case 0x01f2 => 0x01f3 // LATIN CAPITAL LETTER D WITH SMALL LETTER Z + case 0x01f4 => 0x01f5 // LATIN CAPITAL LETTER G WITH ACUTE + case 0x01f6 => 0x0195 // LATIN CAPITAL LETTER HWAIR + case 0x01f7 => 0x01bf // LATIN CAPITAL LETTER WYNN + case 0x01f8 => 0x01f9 // LATIN CAPITAL LETTER N WITH GRAVE + case 0x01fa => 0x01fb // LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE + case 0x01fc => 0x01fd // LATIN CAPITAL LETTER AE WITH ACUTE + case 0x01fe => 0x01ff // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE case 0x0200 => 0x0201 // LATIN CAPITAL LETTER A WITH DOUBLE GRAVE case 0x0202 => 0x0203 // LATIN CAPITAL LETTER A WITH INVERTED BREVE case 0x0204 => 0x0205 // LATIN CAPITAL LETTER E WITH DOUBLE GRAVE case 0x0206 => 0x0207 // LATIN CAPITAL LETTER E WITH INVERTED BREVE case 0x0208 => 0x0209 // LATIN CAPITAL LETTER I WITH DOUBLE GRAVE - case 0x020A => 0x020B // LATIN CAPITAL LETTER I WITH INVERTED BREVE - case 0x020C => 0x020D // LATIN CAPITAL LETTER O WITH DOUBLE GRAVE - case 0x020E => 0x020F // LATIN CAPITAL LETTER O WITH INVERTED BREVE + case 0x020a => 0x020b // LATIN CAPITAL LETTER I WITH INVERTED BREVE + case 0x020c => 0x020d // LATIN CAPITAL LETTER O WITH DOUBLE GRAVE + case 0x020e => 0x020f // LATIN CAPITAL LETTER O WITH INVERTED BREVE case 0x0210 => 0x0211 // LATIN CAPITAL LETTER R WITH DOUBLE GRAVE case 0x0212 => 0x0213 // LATIN CAPITAL LETTER R WITH INVERTED BREVE case 0x0214 => 0x0215 // LATIN CAPITAL LETTER U WITH DOUBLE GRAVE case 0x0216 => 0x0217 // LATIN CAPITAL LETTER U WITH INVERTED BREVE case 0x0218 => 0x0219 // LATIN CAPITAL LETTER S WITH COMMA BELOW - case 0x021A => 0x021B // LATIN CAPITAL LETTER T WITH COMMA BELOW - case 0x021C => 0x021D // LATIN CAPITAL LETTER YOGH - case 0x021E => 0x021F // LATIN CAPITAL LETTER H WITH CARON - case 0x0220 => 0x019E // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG + case 0x021a => 0x021b // LATIN CAPITAL LETTER T WITH COMMA BELOW + case 0x021c => 0x021d // LATIN CAPITAL LETTER YOGH + case 0x021e => 0x021f // LATIN CAPITAL LETTER H WITH CARON + case 0x0220 => 0x019e // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG case 0x0222 => 0x0223 // LATIN CAPITAL LETTER OU case 0x0224 => 0x0225 // LATIN CAPITAL LETTER Z WITH HOOK case 0x0226 => 0x0227 // LATIN CAPITAL LETTER A WITH DOT ABOVE case 0x0228 => 0x0229 // LATIN CAPITAL LETTER E WITH CEDILLA - case 0x022A => 0x022B // LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON - case 0x022C => 0x022D // LATIN CAPITAL LETTER O WITH TILDE AND MACRON - case 0x022E => 0x022F // LATIN CAPITAL LETTER O WITH DOT ABOVE + case 0x022a => 0x022b // LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON + case 0x022c => 0x022d // LATIN CAPITAL LETTER O WITH TILDE AND MACRON + case 0x022e => 0x022f // LATIN CAPITAL LETTER O WITH DOT ABOVE case 0x0230 => 0x0231 // LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON case 0x0232 => 0x0233 // LATIN CAPITAL LETTER Y WITH MACRON - case 0x023A => 0x2C65 // LATIN CAPITAL LETTER A WITH STROKE - case 0x023B => 0x023C // LATIN CAPITAL LETTER C WITH STROKE - case 0x023D => 0x019A // LATIN CAPITAL LETTER L WITH BAR - case 0x023E => 0x2C66 // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE + case 0x023a => 0x2c65 // LATIN CAPITAL LETTER A WITH STROKE + case 0x023b => 0x023c // LATIN CAPITAL LETTER C WITH STROKE + case 0x023d => 0x019a // LATIN CAPITAL LETTER L WITH BAR + case 0x023e => 0x2c66 // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE case 0x0241 => 0x0242 // LATIN CAPITAL LETTER GLOTTAL STOP case 0x0243 => 0x0180 // LATIN CAPITAL LETTER B WITH STROKE case 0x0244 => 0x0289 // LATIN CAPITAL LETTER U BAR - case 0x0245 => 0x028C // LATIN CAPITAL LETTER TURNED V + case 0x0245 => 0x028c // LATIN CAPITAL LETTER TURNED V case 0x0246 => 0x0247 // LATIN CAPITAL LETTER E WITH STROKE case 0x0248 => 0x0249 // LATIN CAPITAL LETTER J WITH STROKE - case 0x024A => 0x024B // LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL - case 0x024C => 0x024D // LATIN CAPITAL LETTER R WITH STROKE - case 0x024E => 0x024F // LATIN CAPITAL LETTER Y WITH STROKE - case 0x0345 => 0x03B9 // COMBINING GREEK YPOGEGRAMMENI + case 0x024a => 0x024b // LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL + case 0x024c => 0x024d // LATIN CAPITAL LETTER R WITH STROKE + case 0x024e => 0x024f // LATIN CAPITAL LETTER Y WITH STROKE + case 0x0345 => 0x03b9 // COMBINING GREEK YPOGEGRAMMENI case 0x0370 => 0x0371 // GREEK CAPITAL LETTER HETA case 0x0372 => 0x0373 // GREEK CAPITAL LETTER ARCHAIC SAMPI case 0x0376 => 0x0377 // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA - case 0x037F => 0x03F3 // GREEK CAPITAL LETTER YOT - case 0x0386 => 0x03AC // GREEK CAPITAL LETTER ALPHA WITH TONOS - case 0x0388 => 0x03AD // GREEK CAPITAL LETTER EPSILON WITH TONOS - case 0x0389 => 0x03AE // GREEK CAPITAL LETTER ETA WITH TONOS - case 0x038A => 0x03AF // GREEK CAPITAL LETTER IOTA WITH TONOS - case 0x038C => 0x03CC // GREEK CAPITAL LETTER OMICRON WITH TONOS - case 0x038E => 0x03CD // GREEK CAPITAL LETTER UPSILON WITH TONOS - case 0x038F => 0x03CE // GREEK CAPITAL LETTER OMEGA WITH TONOS - case 0x0391 => 0x03B1 // GREEK CAPITAL LETTER ALPHA - case 0x0392 => 0x03B2 // GREEK CAPITAL LETTER BETA - case 0x0393 => 0x03B3 // GREEK CAPITAL LETTER GAMMA - case 0x0394 => 0x03B4 // GREEK CAPITAL LETTER DELTA - case 0x0395 => 0x03B5 // GREEK CAPITAL LETTER EPSILON - case 0x0396 => 0x03B6 // GREEK CAPITAL LETTER ZETA - case 0x0397 => 0x03B7 // GREEK CAPITAL LETTER ETA - case 0x0398 => 0x03B8 // GREEK CAPITAL LETTER THETA - case 0x0399 => 0x03B9 // GREEK CAPITAL LETTER IOTA - case 0x039A => 0x03BA // GREEK CAPITAL LETTER KAPPA - case 0x039B => 0x03BB // GREEK CAPITAL LETTER LAMDA - case 0x039C => 0x03BC // GREEK CAPITAL LETTER MU - case 0x039D => 0x03BD // GREEK CAPITAL LETTER NU - case 0x039E => 0x03BE // GREEK CAPITAL LETTER XI - case 0x039F => 0x03BF // GREEK CAPITAL LETTER OMICRON - case 0x03A0 => 0x03C0 // GREEK CAPITAL LETTER PI - case 0x03A1 => 0x03C1 // GREEK CAPITAL LETTER RHO - case 0x03A3 => 0x03C3 // GREEK CAPITAL LETTER SIGMA - case 0x03A4 => 0x03C4 // GREEK CAPITAL LETTER TAU - case 0x03A5 => 0x03C5 // GREEK CAPITAL LETTER UPSILON - case 0x03A6 => 0x03C6 // GREEK CAPITAL LETTER PHI - case 0x03A7 => 0x03C7 // GREEK CAPITAL LETTER CHI - case 0x03A8 => 0x03C8 // GREEK CAPITAL LETTER PSI - case 0x03A9 => 0x03C9 // GREEK CAPITAL LETTER OMEGA - case 0x03AA => 0x03CA // GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - case 0x03AB => 0x03CB // GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - case 0x03C2 => 0x03C3 // GREEK SMALL LETTER FINAL SIGMA - case 0x03CF => 0x03D7 // GREEK CAPITAL KAI SYMBOL - case 0x03D0 => 0x03B2 // GREEK BETA SYMBOL - case 0x03D1 => 0x03B8 // GREEK THETA SYMBOL - case 0x03D5 => 0x03C6 // GREEK PHI SYMBOL - case 0x03D6 => 0x03C0 // GREEK PI SYMBOL - case 0x03D8 => 0x03D9 // GREEK LETTER ARCHAIC KOPPA - case 0x03DA => 0x03DB // GREEK LETTER STIGMA - case 0x03DC => 0x03DD // GREEK LETTER DIGAMMA - case 0x03DE => 0x03DF // GREEK LETTER KOPPA - case 0x03E0 => 0x03E1 // GREEK LETTER SAMPI - case 0x03E2 => 0x03E3 // COPTIC CAPITAL LETTER SHEI - case 0x03E4 => 0x03E5 // COPTIC CAPITAL LETTER FEI - case 0x03E6 => 0x03E7 // COPTIC CAPITAL LETTER KHEI - case 0x03E8 => 0x03E9 // COPTIC CAPITAL LETTER HORI - case 0x03EA => 0x03EB // COPTIC CAPITAL LETTER GANGIA - case 0x03EC => 0x03ED // COPTIC CAPITAL LETTER SHIMA - case 0x03EE => 0x03EF // COPTIC CAPITAL LETTER DEI - case 0x03F0 => 0x03BA // GREEK KAPPA SYMBOL - case 0x03F1 => 0x03C1 // GREEK RHO SYMBOL - case 0x03F4 => 0x03B8 // GREEK CAPITAL THETA SYMBOL - case 0x03F5 => 0x03B5 // GREEK LUNATE EPSILON SYMBOL - case 0x03F7 => 0x03F8 // GREEK CAPITAL LETTER SHO - case 0x03F9 => 0x03F2 // GREEK CAPITAL LUNATE SIGMA SYMBOL - case 0x03FA => 0x03FB // GREEK CAPITAL LETTER SAN - case 0x03FD => 0x037B // GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL - case 0x03FE => 0x037C // GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL - case 0x03FF => 0x037D // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL + case 0x037f => 0x03f3 // GREEK CAPITAL LETTER YOT + case 0x0386 => 0x03ac // GREEK CAPITAL LETTER ALPHA WITH TONOS + case 0x0388 => 0x03ad // GREEK CAPITAL LETTER EPSILON WITH TONOS + case 0x0389 => 0x03ae // GREEK CAPITAL LETTER ETA WITH TONOS + case 0x038a => 0x03af // GREEK CAPITAL LETTER IOTA WITH TONOS + case 0x038c => 0x03cc // GREEK CAPITAL LETTER OMICRON WITH TONOS + case 0x038e => 0x03cd // GREEK CAPITAL LETTER UPSILON WITH TONOS + case 0x038f => 0x03ce // GREEK CAPITAL LETTER OMEGA WITH TONOS + case 0x0391 => 0x03b1 // GREEK CAPITAL LETTER ALPHA + case 0x0392 => 0x03b2 // GREEK CAPITAL LETTER BETA + case 0x0393 => 0x03b3 // GREEK CAPITAL LETTER GAMMA + case 0x0394 => 0x03b4 // GREEK CAPITAL LETTER DELTA + case 0x0395 => 0x03b5 // GREEK CAPITAL LETTER EPSILON + case 0x0396 => 0x03b6 // GREEK CAPITAL LETTER ZETA + case 0x0397 => 0x03b7 // GREEK CAPITAL LETTER ETA + case 0x0398 => 0x03b8 // GREEK CAPITAL LETTER THETA + case 0x0399 => 0x03b9 // GREEK CAPITAL LETTER IOTA + case 0x039a => 0x03ba // GREEK CAPITAL LETTER KAPPA + case 0x039b => 0x03bb // GREEK CAPITAL LETTER LAMDA + case 0x039c => 0x03bc // GREEK CAPITAL LETTER MU + case 0x039d => 0x03bd // GREEK CAPITAL LETTER NU + case 0x039e => 0x03be // GREEK CAPITAL LETTER XI + case 0x039f => 0x03bf // GREEK CAPITAL LETTER OMICRON + case 0x03a0 => 0x03c0 // GREEK CAPITAL LETTER PI + case 0x03a1 => 0x03c1 // GREEK CAPITAL LETTER RHO + case 0x03a3 => 0x03c3 // GREEK CAPITAL LETTER SIGMA + case 0x03a4 => 0x03c4 // GREEK CAPITAL LETTER TAU + case 0x03a5 => 0x03c5 // GREEK CAPITAL LETTER UPSILON + case 0x03a6 => 0x03c6 // GREEK CAPITAL LETTER PHI + case 0x03a7 => 0x03c7 // GREEK CAPITAL LETTER CHI + case 0x03a8 => 0x03c8 // GREEK CAPITAL LETTER PSI + case 0x03a9 => 0x03c9 // GREEK CAPITAL LETTER OMEGA + case 0x03aa => 0x03ca // GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + case 0x03ab => 0x03cb // GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + case 0x03c2 => 0x03c3 // GREEK SMALL LETTER FINAL SIGMA + case 0x03cf => 0x03d7 // GREEK CAPITAL KAI SYMBOL + case 0x03d0 => 0x03b2 // GREEK BETA SYMBOL + case 0x03d1 => 0x03b8 // GREEK THETA SYMBOL + case 0x03d5 => 0x03c6 // GREEK PHI SYMBOL + case 0x03d6 => 0x03c0 // GREEK PI SYMBOL + case 0x03d8 => 0x03d9 // GREEK LETTER ARCHAIC KOPPA + case 0x03da => 0x03db // GREEK LETTER STIGMA + case 0x03dc => 0x03dd // GREEK LETTER DIGAMMA + case 0x03de => 0x03df // GREEK LETTER KOPPA + case 0x03e0 => 0x03e1 // GREEK LETTER SAMPI + case 0x03e2 => 0x03e3 // COPTIC CAPITAL LETTER SHEI + case 0x03e4 => 0x03e5 // COPTIC CAPITAL LETTER FEI + case 0x03e6 => 0x03e7 // COPTIC CAPITAL LETTER KHEI + case 0x03e8 => 0x03e9 // COPTIC CAPITAL LETTER HORI + case 0x03ea => 0x03eb // COPTIC CAPITAL LETTER GANGIA + case 0x03ec => 0x03ed // COPTIC CAPITAL LETTER SHIMA + case 0x03ee => 0x03ef // COPTIC CAPITAL LETTER DEI + case 0x03f0 => 0x03ba // GREEK KAPPA SYMBOL + case 0x03f1 => 0x03c1 // GREEK RHO SYMBOL + case 0x03f4 => 0x03b8 // GREEK CAPITAL THETA SYMBOL + case 0x03f5 => 0x03b5 // GREEK LUNATE EPSILON SYMBOL + case 0x03f7 => 0x03f8 // GREEK CAPITAL LETTER SHO + case 0x03f9 => 0x03f2 // GREEK CAPITAL LUNATE SIGMA SYMBOL + case 0x03fa => 0x03fb // GREEK CAPITAL LETTER SAN + case 0x03fd => 0x037b // GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL + case 0x03fe => 0x037c // GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL + case 0x03ff => 0x037d // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL case 0x0400 => 0x0450 // CYRILLIC CAPITAL LETTER IE WITH GRAVE case 0x0401 => 0x0451 // CYRILLIC CAPITAL LETTER IO case 0x0402 => 0x0452 // CYRILLIC CAPITAL LETTER DJE @@ -517,12 +691,12 @@ private[ci] object CaseFolding { case 0x0407 => 0x0457 // CYRILLIC CAPITAL LETTER YI case 0x0408 => 0x0458 // CYRILLIC CAPITAL LETTER JE case 0x0409 => 0x0459 // CYRILLIC CAPITAL LETTER LJE - case 0x040A => 0x045A // CYRILLIC CAPITAL LETTER NJE - case 0x040B => 0x045B // CYRILLIC CAPITAL LETTER TSHE - case 0x040C => 0x045C // CYRILLIC CAPITAL LETTER KJE - case 0x040D => 0x045D // CYRILLIC CAPITAL LETTER I WITH GRAVE - case 0x040E => 0x045E // CYRILLIC CAPITAL LETTER SHORT U - case 0x040F => 0x045F // CYRILLIC CAPITAL LETTER DZHE + case 0x040a => 0x045a // CYRILLIC CAPITAL LETTER NJE + case 0x040b => 0x045b // CYRILLIC CAPITAL LETTER TSHE + case 0x040c => 0x045c // CYRILLIC CAPITAL LETTER KJE + case 0x040d => 0x045d // CYRILLIC CAPITAL LETTER I WITH GRAVE + case 0x040e => 0x045e // CYRILLIC CAPITAL LETTER SHORT U + case 0x040f => 0x045f // CYRILLIC CAPITAL LETTER DZHE case 0x0410 => 0x0430 // CYRILLIC CAPITAL LETTER A case 0x0411 => 0x0431 // CYRILLIC CAPITAL LETTER BE case 0x0412 => 0x0432 // CYRILLIC CAPITAL LETTER VE @@ -533,12 +707,12 @@ private[ci] object CaseFolding { case 0x0417 => 0x0437 // CYRILLIC CAPITAL LETTER ZE case 0x0418 => 0x0438 // CYRILLIC CAPITAL LETTER I case 0x0419 => 0x0439 // CYRILLIC CAPITAL LETTER SHORT I - case 0x041A => 0x043A // CYRILLIC CAPITAL LETTER KA - case 0x041B => 0x043B // CYRILLIC CAPITAL LETTER EL - case 0x041C => 0x043C // CYRILLIC CAPITAL LETTER EM - case 0x041D => 0x043D // CYRILLIC CAPITAL LETTER EN - case 0x041E => 0x043E // CYRILLIC CAPITAL LETTER O - case 0x041F => 0x043F // CYRILLIC CAPITAL LETTER PE + case 0x041a => 0x043a // CYRILLIC CAPITAL LETTER KA + case 0x041b => 0x043b // CYRILLIC CAPITAL LETTER EL + case 0x041c => 0x043c // CYRILLIC CAPITAL LETTER EM + case 0x041d => 0x043d // CYRILLIC CAPITAL LETTER EN + case 0x041e => 0x043e // CYRILLIC CAPITAL LETTER O + case 0x041f => 0x043f // CYRILLIC CAPITAL LETTER PE case 0x0420 => 0x0440 // CYRILLIC CAPITAL LETTER ER case 0x0421 => 0x0441 // CYRILLIC CAPITAL LETTER ES case 0x0422 => 0x0442 // CYRILLIC CAPITAL LETTER TE @@ -549,112 +723,112 @@ private[ci] object CaseFolding { case 0x0427 => 0x0447 // CYRILLIC CAPITAL LETTER CHE case 0x0428 => 0x0448 // CYRILLIC CAPITAL LETTER SHA case 0x0429 => 0x0449 // CYRILLIC CAPITAL LETTER SHCHA - case 0x042A => 0x044A // CYRILLIC CAPITAL LETTER HARD SIGN - case 0x042B => 0x044B // CYRILLIC CAPITAL LETTER YERU - case 0x042C => 0x044C // CYRILLIC CAPITAL LETTER SOFT SIGN - case 0x042D => 0x044D // CYRILLIC CAPITAL LETTER E - case 0x042E => 0x044E // CYRILLIC CAPITAL LETTER YU - case 0x042F => 0x044F // CYRILLIC CAPITAL LETTER YA + case 0x042a => 0x044a // CYRILLIC CAPITAL LETTER HARD SIGN + case 0x042b => 0x044b // CYRILLIC CAPITAL LETTER YERU + case 0x042c => 0x044c // CYRILLIC CAPITAL LETTER SOFT SIGN + case 0x042d => 0x044d // CYRILLIC CAPITAL LETTER E + case 0x042e => 0x044e // CYRILLIC CAPITAL LETTER YU + case 0x042f => 0x044f // CYRILLIC CAPITAL LETTER YA case 0x0460 => 0x0461 // CYRILLIC CAPITAL LETTER OMEGA case 0x0462 => 0x0463 // CYRILLIC CAPITAL LETTER YAT case 0x0464 => 0x0465 // CYRILLIC CAPITAL LETTER IOTIFIED E case 0x0466 => 0x0467 // CYRILLIC CAPITAL LETTER LITTLE YUS case 0x0468 => 0x0469 // CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS - case 0x046A => 0x046B // CYRILLIC CAPITAL LETTER BIG YUS - case 0x046C => 0x046D // CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS - case 0x046E => 0x046F // CYRILLIC CAPITAL LETTER KSI + case 0x046a => 0x046b // CYRILLIC CAPITAL LETTER BIG YUS + case 0x046c => 0x046d // CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS + case 0x046e => 0x046f // CYRILLIC CAPITAL LETTER KSI case 0x0470 => 0x0471 // CYRILLIC CAPITAL LETTER PSI case 0x0472 => 0x0473 // CYRILLIC CAPITAL LETTER FITA case 0x0474 => 0x0475 // CYRILLIC CAPITAL LETTER IZHITSA case 0x0476 => 0x0477 // CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT case 0x0478 => 0x0479 // CYRILLIC CAPITAL LETTER UK - case 0x047A => 0x047B // CYRILLIC CAPITAL LETTER ROUND OMEGA - case 0x047C => 0x047D // CYRILLIC CAPITAL LETTER OMEGA WITH TITLO - case 0x047E => 0x047F // CYRILLIC CAPITAL LETTER OT + case 0x047a => 0x047b // CYRILLIC CAPITAL LETTER ROUND OMEGA + case 0x047c => 0x047d // CYRILLIC CAPITAL LETTER OMEGA WITH TITLO + case 0x047e => 0x047f // CYRILLIC CAPITAL LETTER OT case 0x0480 => 0x0481 // CYRILLIC CAPITAL LETTER KOPPA - case 0x048A => 0x048B // CYRILLIC CAPITAL LETTER SHORT I WITH TAIL - case 0x048C => 0x048D // CYRILLIC CAPITAL LETTER SEMISOFT SIGN - case 0x048E => 0x048F // CYRILLIC CAPITAL LETTER ER WITH TICK + case 0x048a => 0x048b // CYRILLIC CAPITAL LETTER SHORT I WITH TAIL + case 0x048c => 0x048d // CYRILLIC CAPITAL LETTER SEMISOFT SIGN + case 0x048e => 0x048f // CYRILLIC CAPITAL LETTER ER WITH TICK case 0x0490 => 0x0491 // CYRILLIC CAPITAL LETTER GHE WITH UPTURN case 0x0492 => 0x0493 // CYRILLIC CAPITAL LETTER GHE WITH STROKE case 0x0494 => 0x0495 // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK case 0x0496 => 0x0497 // CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER case 0x0498 => 0x0499 // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER - case 0x049A => 0x049B // CYRILLIC CAPITAL LETTER KA WITH DESCENDER - case 0x049C => 0x049D // CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE - case 0x049E => 0x049F // CYRILLIC CAPITAL LETTER KA WITH STROKE - case 0x04A0 => 0x04A1 // CYRILLIC CAPITAL LETTER BASHKIR KA - case 0x04A2 => 0x04A3 // CYRILLIC CAPITAL LETTER EN WITH DESCENDER - case 0x04A4 => 0x04A5 // CYRILLIC CAPITAL LIGATURE EN GHE - case 0x04A6 => 0x04A7 // CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK - case 0x04A8 => 0x04A9 // CYRILLIC CAPITAL LETTER ABKHASIAN HA - case 0x04AA => 0x04AB // CYRILLIC CAPITAL LETTER ES WITH DESCENDER - case 0x04AC => 0x04AD // CYRILLIC CAPITAL LETTER TE WITH DESCENDER - case 0x04AE => 0x04AF // CYRILLIC CAPITAL LETTER STRAIGHT U - case 0x04B0 => 0x04B1 // CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE - case 0x04B2 => 0x04B3 // CYRILLIC CAPITAL LETTER HA WITH DESCENDER - case 0x04B4 => 0x04B5 // CYRILLIC CAPITAL LIGATURE TE TSE - case 0x04B6 => 0x04B7 // CYRILLIC CAPITAL LETTER CHE WITH DESCENDER - case 0x04B8 => 0x04B9 // CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE - case 0x04BA => 0x04BB // CYRILLIC CAPITAL LETTER SHHA - case 0x04BC => 0x04BD // CYRILLIC CAPITAL LETTER ABKHASIAN CHE - case 0x04BE => 0x04BF // CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER - case 0x04C0 => 0x04CF // CYRILLIC LETTER PALOCHKA - case 0x04C1 => 0x04C2 // CYRILLIC CAPITAL LETTER ZHE WITH BREVE - case 0x04C3 => 0x04C4 // CYRILLIC CAPITAL LETTER KA WITH HOOK - case 0x04C5 => 0x04C6 // CYRILLIC CAPITAL LETTER EL WITH TAIL - case 0x04C7 => 0x04C8 // CYRILLIC CAPITAL LETTER EN WITH HOOK - case 0x04C9 => 0x04CA // CYRILLIC CAPITAL LETTER EN WITH TAIL - case 0x04CB => 0x04CC // CYRILLIC CAPITAL LETTER KHAKASSIAN CHE - case 0x04CD => 0x04CE // CYRILLIC CAPITAL LETTER EM WITH TAIL - case 0x04D0 => 0x04D1 // CYRILLIC CAPITAL LETTER A WITH BREVE - case 0x04D2 => 0x04D3 // CYRILLIC CAPITAL LETTER A WITH DIAERESIS - case 0x04D4 => 0x04D5 // CYRILLIC CAPITAL LIGATURE A IE - case 0x04D6 => 0x04D7 // CYRILLIC CAPITAL LETTER IE WITH BREVE - case 0x04D8 => 0x04D9 // CYRILLIC CAPITAL LETTER SCHWA - case 0x04DA => 0x04DB // CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS - case 0x04DC => 0x04DD // CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS - case 0x04DE => 0x04DF // CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS - case 0x04E0 => 0x04E1 // CYRILLIC CAPITAL LETTER ABKHASIAN DZE - case 0x04E2 => 0x04E3 // CYRILLIC CAPITAL LETTER I WITH MACRON - case 0x04E4 => 0x04E5 // CYRILLIC CAPITAL LETTER I WITH DIAERESIS - case 0x04E6 => 0x04E7 // CYRILLIC CAPITAL LETTER O WITH DIAERESIS - case 0x04E8 => 0x04E9 // CYRILLIC CAPITAL LETTER BARRED O - case 0x04EA => 0x04EB // CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS - case 0x04EC => 0x04ED // CYRILLIC CAPITAL LETTER E WITH DIAERESIS - case 0x04EE => 0x04EF // CYRILLIC CAPITAL LETTER U WITH MACRON - case 0x04F0 => 0x04F1 // CYRILLIC CAPITAL LETTER U WITH DIAERESIS - case 0x04F2 => 0x04F3 // CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE - case 0x04F4 => 0x04F5 // CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS - case 0x04F6 => 0x04F7 // CYRILLIC CAPITAL LETTER GHE WITH DESCENDER - case 0x04F8 => 0x04F9 // CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS - case 0x04FA => 0x04FB // CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK - case 0x04FC => 0x04FD // CYRILLIC CAPITAL LETTER HA WITH HOOK - case 0x04FE => 0x04FF // CYRILLIC CAPITAL LETTER HA WITH STROKE + case 0x049a => 0x049b // CYRILLIC CAPITAL LETTER KA WITH DESCENDER + case 0x049c => 0x049d // CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE + case 0x049e => 0x049f // CYRILLIC CAPITAL LETTER KA WITH STROKE + case 0x04a0 => 0x04a1 // CYRILLIC CAPITAL LETTER BASHKIR KA + case 0x04a2 => 0x04a3 // CYRILLIC CAPITAL LETTER EN WITH DESCENDER + case 0x04a4 => 0x04a5 // CYRILLIC CAPITAL LIGATURE EN GHE + case 0x04a6 => 0x04a7 // CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK + case 0x04a8 => 0x04a9 // CYRILLIC CAPITAL LETTER ABKHASIAN HA + case 0x04aa => 0x04ab // CYRILLIC CAPITAL LETTER ES WITH DESCENDER + case 0x04ac => 0x04ad // CYRILLIC CAPITAL LETTER TE WITH DESCENDER + case 0x04ae => 0x04af // CYRILLIC CAPITAL LETTER STRAIGHT U + case 0x04b0 => 0x04b1 // CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE + case 0x04b2 => 0x04b3 // CYRILLIC CAPITAL LETTER HA WITH DESCENDER + case 0x04b4 => 0x04b5 // CYRILLIC CAPITAL LIGATURE TE TSE + case 0x04b6 => 0x04b7 // CYRILLIC CAPITAL LETTER CHE WITH DESCENDER + case 0x04b8 => 0x04b9 // CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE + case 0x04ba => 0x04bb // CYRILLIC CAPITAL LETTER SHHA + case 0x04bc => 0x04bd // CYRILLIC CAPITAL LETTER ABKHASIAN CHE + case 0x04be => 0x04bf // CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER + case 0x04c0 => 0x04cf // CYRILLIC LETTER PALOCHKA + case 0x04c1 => 0x04c2 // CYRILLIC CAPITAL LETTER ZHE WITH BREVE + case 0x04c3 => 0x04c4 // CYRILLIC CAPITAL LETTER KA WITH HOOK + case 0x04c5 => 0x04c6 // CYRILLIC CAPITAL LETTER EL WITH TAIL + case 0x04c7 => 0x04c8 // CYRILLIC CAPITAL LETTER EN WITH HOOK + case 0x04c9 => 0x04ca // CYRILLIC CAPITAL LETTER EN WITH TAIL + case 0x04cb => 0x04cc // CYRILLIC CAPITAL LETTER KHAKASSIAN CHE + case 0x04cd => 0x04ce // CYRILLIC CAPITAL LETTER EM WITH TAIL + case 0x04d0 => 0x04d1 // CYRILLIC CAPITAL LETTER A WITH BREVE + case 0x04d2 => 0x04d3 // CYRILLIC CAPITAL LETTER A WITH DIAERESIS + case 0x04d4 => 0x04d5 // CYRILLIC CAPITAL LIGATURE A IE + case 0x04d6 => 0x04d7 // CYRILLIC CAPITAL LETTER IE WITH BREVE + case 0x04d8 => 0x04d9 // CYRILLIC CAPITAL LETTER SCHWA + case 0x04da => 0x04db // CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS + case 0x04dc => 0x04dd // CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS + case 0x04de => 0x04df // CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS + case 0x04e0 => 0x04e1 // CYRILLIC CAPITAL LETTER ABKHASIAN DZE + case 0x04e2 => 0x04e3 // CYRILLIC CAPITAL LETTER I WITH MACRON + case 0x04e4 => 0x04e5 // CYRILLIC CAPITAL LETTER I WITH DIAERESIS + case 0x04e6 => 0x04e7 // CYRILLIC CAPITAL LETTER O WITH DIAERESIS + case 0x04e8 => 0x04e9 // CYRILLIC CAPITAL LETTER BARRED O + case 0x04ea => 0x04eb // CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS + case 0x04ec => 0x04ed // CYRILLIC CAPITAL LETTER E WITH DIAERESIS + case 0x04ee => 0x04ef // CYRILLIC CAPITAL LETTER U WITH MACRON + case 0x04f0 => 0x04f1 // CYRILLIC CAPITAL LETTER U WITH DIAERESIS + case 0x04f2 => 0x04f3 // CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE + case 0x04f4 => 0x04f5 // CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS + case 0x04f6 => 0x04f7 // CYRILLIC CAPITAL LETTER GHE WITH DESCENDER + case 0x04f8 => 0x04f9 // CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS + case 0x04fa => 0x04fb // CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK + case 0x04fc => 0x04fd // CYRILLIC CAPITAL LETTER HA WITH HOOK + case 0x04fe => 0x04ff // CYRILLIC CAPITAL LETTER HA WITH STROKE case 0x0500 => 0x0501 // CYRILLIC CAPITAL LETTER KOMI DE case 0x0502 => 0x0503 // CYRILLIC CAPITAL LETTER KOMI DJE case 0x0504 => 0x0505 // CYRILLIC CAPITAL LETTER KOMI ZJE case 0x0506 => 0x0507 // CYRILLIC CAPITAL LETTER KOMI DZJE case 0x0508 => 0x0509 // CYRILLIC CAPITAL LETTER KOMI LJE - case 0x050A => 0x050B // CYRILLIC CAPITAL LETTER KOMI NJE - case 0x050C => 0x050D // CYRILLIC CAPITAL LETTER KOMI SJE - case 0x050E => 0x050F // CYRILLIC CAPITAL LETTER KOMI TJE + case 0x050a => 0x050b // CYRILLIC CAPITAL LETTER KOMI NJE + case 0x050c => 0x050d // CYRILLIC CAPITAL LETTER KOMI SJE + case 0x050e => 0x050f // CYRILLIC CAPITAL LETTER KOMI TJE case 0x0510 => 0x0511 // CYRILLIC CAPITAL LETTER REVERSED ZE case 0x0512 => 0x0513 // CYRILLIC CAPITAL LETTER EL WITH HOOK case 0x0514 => 0x0515 // CYRILLIC CAPITAL LETTER LHA case 0x0516 => 0x0517 // CYRILLIC CAPITAL LETTER RHA case 0x0518 => 0x0519 // CYRILLIC CAPITAL LETTER YAE - case 0x051A => 0x051B // CYRILLIC CAPITAL LETTER QA - case 0x051C => 0x051D // CYRILLIC CAPITAL LETTER WE - case 0x051E => 0x051F // CYRILLIC CAPITAL LETTER ALEUT KA + case 0x051a => 0x051b // CYRILLIC CAPITAL LETTER QA + case 0x051c => 0x051d // CYRILLIC CAPITAL LETTER WE + case 0x051e => 0x051f // CYRILLIC CAPITAL LETTER ALEUT KA case 0x0520 => 0x0521 // CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK case 0x0522 => 0x0523 // CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK case 0x0524 => 0x0525 // CYRILLIC CAPITAL LETTER PE WITH DESCENDER case 0x0526 => 0x0527 // CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER case 0x0528 => 0x0529 // CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK - case 0x052A => 0x052B // CYRILLIC CAPITAL LETTER DZZHE - case 0x052C => 0x052D // CYRILLIC CAPITAL LETTER DCHE - case 0x052E => 0x052F // CYRILLIC CAPITAL LETTER EL WITH DESCENDER + case 0x052a => 0x052b // CYRILLIC CAPITAL LETTER DZZHE + case 0x052c => 0x052d // CYRILLIC CAPITAL LETTER DCHE + case 0x052e => 0x052f // CYRILLIC CAPITAL LETTER EL WITH DESCENDER case 0x0531 => 0x0561 // ARMENIAN CAPITAL LETTER AYB case 0x0532 => 0x0562 // ARMENIAN CAPITAL LETTER BEN case 0x0533 => 0x0563 // ARMENIAN CAPITAL LETTER GIM @@ -664,12 +838,12 @@ private[ci] object CaseFolding { case 0x0537 => 0x0567 // ARMENIAN CAPITAL LETTER EH case 0x0538 => 0x0568 // ARMENIAN CAPITAL LETTER ET case 0x0539 => 0x0569 // ARMENIAN CAPITAL LETTER TO - case 0x053A => 0x056A // ARMENIAN CAPITAL LETTER ZHE - case 0x053B => 0x056B // ARMENIAN CAPITAL LETTER INI - case 0x053C => 0x056C // ARMENIAN CAPITAL LETTER LIWN - case 0x053D => 0x056D // ARMENIAN CAPITAL LETTER XEH - case 0x053E => 0x056E // ARMENIAN CAPITAL LETTER CA - case 0x053F => 0x056F // ARMENIAN CAPITAL LETTER KEN + case 0x053a => 0x056a // ARMENIAN CAPITAL LETTER ZHE + case 0x053b => 0x056b // ARMENIAN CAPITAL LETTER INI + case 0x053c => 0x056c // ARMENIAN CAPITAL LETTER LIWN + case 0x053d => 0x056d // ARMENIAN CAPITAL LETTER XEH + case 0x053e => 0x056e // ARMENIAN CAPITAL LETTER CA + case 0x053f => 0x056f // ARMENIAN CAPITAL LETTER KEN case 0x0540 => 0x0570 // ARMENIAN CAPITAL LETTER HO case 0x0541 => 0x0571 // ARMENIAN CAPITAL LETTER JA case 0x0542 => 0x0572 // ARMENIAN CAPITAL LETTER GHAD @@ -680,12 +854,12 @@ private[ci] object CaseFolding { case 0x0547 => 0x0577 // ARMENIAN CAPITAL LETTER SHA case 0x0548 => 0x0578 // ARMENIAN CAPITAL LETTER VO case 0x0549 => 0x0579 // ARMENIAN CAPITAL LETTER CHA - case 0x054A => 0x057A // ARMENIAN CAPITAL LETTER PEH - case 0x054B => 0x057B // ARMENIAN CAPITAL LETTER JHEH - case 0x054C => 0x057C // ARMENIAN CAPITAL LETTER RA - case 0x054D => 0x057D // ARMENIAN CAPITAL LETTER SEH - case 0x054E => 0x057E // ARMENIAN CAPITAL LETTER VEW - case 0x054F => 0x057F // ARMENIAN CAPITAL LETTER TIWN + case 0x054a => 0x057a // ARMENIAN CAPITAL LETTER PEH + case 0x054b => 0x057b // ARMENIAN CAPITAL LETTER JHEH + case 0x054c => 0x057c // ARMENIAN CAPITAL LETTER RA + case 0x054d => 0x057d // ARMENIAN CAPITAL LETTER SEH + case 0x054e => 0x057e // ARMENIAN CAPITAL LETTER VEW + case 0x054f => 0x057f // ARMENIAN CAPITAL LETTER TIWN case 0x0550 => 0x0580 // ARMENIAN CAPITAL LETTER REH case 0x0551 => 0x0581 // ARMENIAN CAPITAL LETTER CO case 0x0552 => 0x0582 // ARMENIAN CAPITAL LETTER YIWN @@ -693,305 +867,305 @@ private[ci] object CaseFolding { case 0x0554 => 0x0584 // ARMENIAN CAPITAL LETTER KEH case 0x0555 => 0x0585 // ARMENIAN CAPITAL LETTER OH case 0x0556 => 0x0586 // ARMENIAN CAPITAL LETTER FEH - case 0x10A0 => 0x2D00 // GEORGIAN CAPITAL LETTER AN - case 0x10A1 => 0x2D01 // GEORGIAN CAPITAL LETTER BAN - case 0x10A2 => 0x2D02 // GEORGIAN CAPITAL LETTER GAN - case 0x10A3 => 0x2D03 // GEORGIAN CAPITAL LETTER DON - case 0x10A4 => 0x2D04 // GEORGIAN CAPITAL LETTER EN - case 0x10A5 => 0x2D05 // GEORGIAN CAPITAL LETTER VIN - case 0x10A6 => 0x2D06 // GEORGIAN CAPITAL LETTER ZEN - case 0x10A7 => 0x2D07 // GEORGIAN CAPITAL LETTER TAN - case 0x10A8 => 0x2D08 // GEORGIAN CAPITAL LETTER IN - case 0x10A9 => 0x2D09 // GEORGIAN CAPITAL LETTER KAN - case 0x10AA => 0x2D0A // GEORGIAN CAPITAL LETTER LAS - case 0x10AB => 0x2D0B // GEORGIAN CAPITAL LETTER MAN - case 0x10AC => 0x2D0C // GEORGIAN CAPITAL LETTER NAR - case 0x10AD => 0x2D0D // GEORGIAN CAPITAL LETTER ON - case 0x10AE => 0x2D0E // GEORGIAN CAPITAL LETTER PAR - case 0x10AF => 0x2D0F // GEORGIAN CAPITAL LETTER ZHAR - case 0x10B0 => 0x2D10 // GEORGIAN CAPITAL LETTER RAE - case 0x10B1 => 0x2D11 // GEORGIAN CAPITAL LETTER SAN - case 0x10B2 => 0x2D12 // GEORGIAN CAPITAL LETTER TAR - case 0x10B3 => 0x2D13 // GEORGIAN CAPITAL LETTER UN - case 0x10B4 => 0x2D14 // GEORGIAN CAPITAL LETTER PHAR - case 0x10B5 => 0x2D15 // GEORGIAN CAPITAL LETTER KHAR - case 0x10B6 => 0x2D16 // GEORGIAN CAPITAL LETTER GHAN - case 0x10B7 => 0x2D17 // GEORGIAN CAPITAL LETTER QAR - case 0x10B8 => 0x2D18 // GEORGIAN CAPITAL LETTER SHIN - case 0x10B9 => 0x2D19 // GEORGIAN CAPITAL LETTER CHIN - case 0x10BA => 0x2D1A // GEORGIAN CAPITAL LETTER CAN - case 0x10BB => 0x2D1B // GEORGIAN CAPITAL LETTER JIL - case 0x10BC => 0x2D1C // GEORGIAN CAPITAL LETTER CIL - case 0x10BD => 0x2D1D // GEORGIAN CAPITAL LETTER CHAR - case 0x10BE => 0x2D1E // GEORGIAN CAPITAL LETTER XAN - case 0x10BF => 0x2D1F // GEORGIAN CAPITAL LETTER JHAN - case 0x10C0 => 0x2D20 // GEORGIAN CAPITAL LETTER HAE - case 0x10C1 => 0x2D21 // GEORGIAN CAPITAL LETTER HE - case 0x10C2 => 0x2D22 // GEORGIAN CAPITAL LETTER HIE - case 0x10C3 => 0x2D23 // GEORGIAN CAPITAL LETTER WE - case 0x10C4 => 0x2D24 // GEORGIAN CAPITAL LETTER HAR - case 0x10C5 => 0x2D25 // GEORGIAN CAPITAL LETTER HOE - case 0x10C7 => 0x2D27 // GEORGIAN CAPITAL LETTER YN - case 0x10CD => 0x2D2D // GEORGIAN CAPITAL LETTER AEN - case 0x13F8 => 0x13F0 // CHEROKEE SMALL LETTER YE - case 0x13F9 => 0x13F1 // CHEROKEE SMALL LETTER YI - case 0x13FA => 0x13F2 // CHEROKEE SMALL LETTER YO - case 0x13FB => 0x13F3 // CHEROKEE SMALL LETTER YU - case 0x13FC => 0x13F4 // CHEROKEE SMALL LETTER YV - case 0x13FD => 0x13F5 // CHEROKEE SMALL LETTER MV - case 0x1C80 => 0x0432 // CYRILLIC SMALL LETTER ROUNDED VE - case 0x1C81 => 0x0434 // CYRILLIC SMALL LETTER LONG-LEGGED DE - case 0x1C82 => 0x043E // CYRILLIC SMALL LETTER NARROW O - case 0x1C83 => 0x0441 // CYRILLIC SMALL LETTER WIDE ES - case 0x1C84 => 0x0442 // CYRILLIC SMALL LETTER TALL TE - case 0x1C85 => 0x0442 // CYRILLIC SMALL LETTER THREE-LEGGED TE - case 0x1C86 => 0x044A // CYRILLIC SMALL LETTER TALL HARD SIGN - case 0x1C87 => 0x0463 // CYRILLIC SMALL LETTER TALL YAT - case 0x1C88 => 0xA64B // CYRILLIC SMALL LETTER UNBLENDED UK - case 0x1C90 => 0x10D0 // GEORGIAN MTAVRULI CAPITAL LETTER AN - case 0x1C91 => 0x10D1 // GEORGIAN MTAVRULI CAPITAL LETTER BAN - case 0x1C92 => 0x10D2 // GEORGIAN MTAVRULI CAPITAL LETTER GAN - case 0x1C93 => 0x10D3 // GEORGIAN MTAVRULI CAPITAL LETTER DON - case 0x1C94 => 0x10D4 // GEORGIAN MTAVRULI CAPITAL LETTER EN - case 0x1C95 => 0x10D5 // GEORGIAN MTAVRULI CAPITAL LETTER VIN - case 0x1C96 => 0x10D6 // GEORGIAN MTAVRULI CAPITAL LETTER ZEN - case 0x1C97 => 0x10D7 // GEORGIAN MTAVRULI CAPITAL LETTER TAN - case 0x1C98 => 0x10D8 // GEORGIAN MTAVRULI CAPITAL LETTER IN - case 0x1C99 => 0x10D9 // GEORGIAN MTAVRULI CAPITAL LETTER KAN - case 0x1C9A => 0x10DA // GEORGIAN MTAVRULI CAPITAL LETTER LAS - case 0x1C9B => 0x10DB // GEORGIAN MTAVRULI CAPITAL LETTER MAN - case 0x1C9C => 0x10DC // GEORGIAN MTAVRULI CAPITAL LETTER NAR - case 0x1C9D => 0x10DD // GEORGIAN MTAVRULI CAPITAL LETTER ON - case 0x1C9E => 0x10DE // GEORGIAN MTAVRULI CAPITAL LETTER PAR - case 0x1C9F => 0x10DF // GEORGIAN MTAVRULI CAPITAL LETTER ZHAR - case 0x1CA0 => 0x10E0 // GEORGIAN MTAVRULI CAPITAL LETTER RAE - case 0x1CA1 => 0x10E1 // GEORGIAN MTAVRULI CAPITAL LETTER SAN - case 0x1CA2 => 0x10E2 // GEORGIAN MTAVRULI CAPITAL LETTER TAR - case 0x1CA3 => 0x10E3 // GEORGIAN MTAVRULI CAPITAL LETTER UN - case 0x1CA4 => 0x10E4 // GEORGIAN MTAVRULI CAPITAL LETTER PHAR - case 0x1CA5 => 0x10E5 // GEORGIAN MTAVRULI CAPITAL LETTER KHAR - case 0x1CA6 => 0x10E6 // GEORGIAN MTAVRULI CAPITAL LETTER GHAN - case 0x1CA7 => 0x10E7 // GEORGIAN MTAVRULI CAPITAL LETTER QAR - case 0x1CA8 => 0x10E8 // GEORGIAN MTAVRULI CAPITAL LETTER SHIN - case 0x1CA9 => 0x10E9 // GEORGIAN MTAVRULI CAPITAL LETTER CHIN - case 0x1CAA => 0x10EA // GEORGIAN MTAVRULI CAPITAL LETTER CAN - case 0x1CAB => 0x10EB // GEORGIAN MTAVRULI CAPITAL LETTER JIL - case 0x1CAC => 0x10EC // GEORGIAN MTAVRULI CAPITAL LETTER CIL - case 0x1CAD => 0x10ED // GEORGIAN MTAVRULI CAPITAL LETTER CHAR - case 0x1CAE => 0x10EE // GEORGIAN MTAVRULI CAPITAL LETTER XAN - case 0x1CAF => 0x10EF // GEORGIAN MTAVRULI CAPITAL LETTER JHAN - case 0x1CB0 => 0x10F0 // GEORGIAN MTAVRULI CAPITAL LETTER HAE - case 0x1CB1 => 0x10F1 // GEORGIAN MTAVRULI CAPITAL LETTER HE - case 0x1CB2 => 0x10F2 // GEORGIAN MTAVRULI CAPITAL LETTER HIE - case 0x1CB3 => 0x10F3 // GEORGIAN MTAVRULI CAPITAL LETTER WE - case 0x1CB4 => 0x10F4 // GEORGIAN MTAVRULI CAPITAL LETTER HAR - case 0x1CB5 => 0x10F5 // GEORGIAN MTAVRULI CAPITAL LETTER HOE - case 0x1CB6 => 0x10F6 // GEORGIAN MTAVRULI CAPITAL LETTER FI - case 0x1CB7 => 0x10F7 // GEORGIAN MTAVRULI CAPITAL LETTER YN - case 0x1CB8 => 0x10F8 // GEORGIAN MTAVRULI CAPITAL LETTER ELIFI - case 0x1CB9 => 0x10F9 // GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN - case 0x1CBA => 0x10FA // GEORGIAN MTAVRULI CAPITAL LETTER AIN - case 0x1CBD => 0x10FD // GEORGIAN MTAVRULI CAPITAL LETTER AEN - case 0x1CBE => 0x10FE // GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN - case 0x1CBF => 0x10FF // GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN - case 0x1E00 => 0x1E01 // LATIN CAPITAL LETTER A WITH RING BELOW - case 0x1E02 => 0x1E03 // LATIN CAPITAL LETTER B WITH DOT ABOVE - case 0x1E04 => 0x1E05 // LATIN CAPITAL LETTER B WITH DOT BELOW - case 0x1E06 => 0x1E07 // LATIN CAPITAL LETTER B WITH LINE BELOW - case 0x1E08 => 0x1E09 // LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE - case 0x1E0A => 0x1E0B // LATIN CAPITAL LETTER D WITH DOT ABOVE - case 0x1E0C => 0x1E0D // LATIN CAPITAL LETTER D WITH DOT BELOW - case 0x1E0E => 0x1E0F // LATIN CAPITAL LETTER D WITH LINE BELOW - case 0x1E10 => 0x1E11 // LATIN CAPITAL LETTER D WITH CEDILLA - case 0x1E12 => 0x1E13 // LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW - case 0x1E14 => 0x1E15 // LATIN CAPITAL LETTER E WITH MACRON AND GRAVE - case 0x1E16 => 0x1E17 // LATIN CAPITAL LETTER E WITH MACRON AND ACUTE - case 0x1E18 => 0x1E19 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW - case 0x1E1A => 0x1E1B // LATIN CAPITAL LETTER E WITH TILDE BELOW - case 0x1E1C => 0x1E1D // LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE - case 0x1E1E => 0x1E1F // LATIN CAPITAL LETTER F WITH DOT ABOVE - case 0x1E20 => 0x1E21 // LATIN CAPITAL LETTER G WITH MACRON - case 0x1E22 => 0x1E23 // LATIN CAPITAL LETTER H WITH DOT ABOVE - case 0x1E24 => 0x1E25 // LATIN CAPITAL LETTER H WITH DOT BELOW - case 0x1E26 => 0x1E27 // LATIN CAPITAL LETTER H WITH DIAERESIS - case 0x1E28 => 0x1E29 // LATIN CAPITAL LETTER H WITH CEDILLA - case 0x1E2A => 0x1E2B // LATIN CAPITAL LETTER H WITH BREVE BELOW - case 0x1E2C => 0x1E2D // LATIN CAPITAL LETTER I WITH TILDE BELOW - case 0x1E2E => 0x1E2F // LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE - case 0x1E30 => 0x1E31 // LATIN CAPITAL LETTER K WITH ACUTE - case 0x1E32 => 0x1E33 // LATIN CAPITAL LETTER K WITH DOT BELOW - case 0x1E34 => 0x1E35 // LATIN CAPITAL LETTER K WITH LINE BELOW - case 0x1E36 => 0x1E37 // LATIN CAPITAL LETTER L WITH DOT BELOW - case 0x1E38 => 0x1E39 // LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON - case 0x1E3A => 0x1E3B // LATIN CAPITAL LETTER L WITH LINE BELOW - case 0x1E3C => 0x1E3D // LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW - case 0x1E3E => 0x1E3F // LATIN CAPITAL LETTER M WITH ACUTE - case 0x1E40 => 0x1E41 // LATIN CAPITAL LETTER M WITH DOT ABOVE - case 0x1E42 => 0x1E43 // LATIN CAPITAL LETTER M WITH DOT BELOW - case 0x1E44 => 0x1E45 // LATIN CAPITAL LETTER N WITH DOT ABOVE - case 0x1E46 => 0x1E47 // LATIN CAPITAL LETTER N WITH DOT BELOW - case 0x1E48 => 0x1E49 // LATIN CAPITAL LETTER N WITH LINE BELOW - case 0x1E4A => 0x1E4B // LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW - case 0x1E4C => 0x1E4D // LATIN CAPITAL LETTER O WITH TILDE AND ACUTE - case 0x1E4E => 0x1E4F // LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS - case 0x1E50 => 0x1E51 // LATIN CAPITAL LETTER O WITH MACRON AND GRAVE - case 0x1E52 => 0x1E53 // LATIN CAPITAL LETTER O WITH MACRON AND ACUTE - case 0x1E54 => 0x1E55 // LATIN CAPITAL LETTER P WITH ACUTE - case 0x1E56 => 0x1E57 // LATIN CAPITAL LETTER P WITH DOT ABOVE - case 0x1E58 => 0x1E59 // LATIN CAPITAL LETTER R WITH DOT ABOVE - case 0x1E5A => 0x1E5B // LATIN CAPITAL LETTER R WITH DOT BELOW - case 0x1E5C => 0x1E5D // LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON - case 0x1E5E => 0x1E5F // LATIN CAPITAL LETTER R WITH LINE BELOW - case 0x1E60 => 0x1E61 // LATIN CAPITAL LETTER S WITH DOT ABOVE - case 0x1E62 => 0x1E63 // LATIN CAPITAL LETTER S WITH DOT BELOW - case 0x1E64 => 0x1E65 // LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE - case 0x1E66 => 0x1E67 // LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE - case 0x1E68 => 0x1E69 // LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE - case 0x1E6A => 0x1E6B // LATIN CAPITAL LETTER T WITH DOT ABOVE - case 0x1E6C => 0x1E6D // LATIN CAPITAL LETTER T WITH DOT BELOW - case 0x1E6E => 0x1E6F // LATIN CAPITAL LETTER T WITH LINE BELOW - case 0x1E70 => 0x1E71 // LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW - case 0x1E72 => 0x1E73 // LATIN CAPITAL LETTER U WITH DIAERESIS BELOW - case 0x1E74 => 0x1E75 // LATIN CAPITAL LETTER U WITH TILDE BELOW - case 0x1E76 => 0x1E77 // LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW - case 0x1E78 => 0x1E79 // LATIN CAPITAL LETTER U WITH TILDE AND ACUTE - case 0x1E7A => 0x1E7B // LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS - case 0x1E7C => 0x1E7D // LATIN CAPITAL LETTER V WITH TILDE - case 0x1E7E => 0x1E7F // LATIN CAPITAL LETTER V WITH DOT BELOW - case 0x1E80 => 0x1E81 // LATIN CAPITAL LETTER W WITH GRAVE - case 0x1E82 => 0x1E83 // LATIN CAPITAL LETTER W WITH ACUTE - case 0x1E84 => 0x1E85 // LATIN CAPITAL LETTER W WITH DIAERESIS - case 0x1E86 => 0x1E87 // LATIN CAPITAL LETTER W WITH DOT ABOVE - case 0x1E88 => 0x1E89 // LATIN CAPITAL LETTER W WITH DOT BELOW - case 0x1E8A => 0x1E8B // LATIN CAPITAL LETTER X WITH DOT ABOVE - case 0x1E8C => 0x1E8D // LATIN CAPITAL LETTER X WITH DIAERESIS - case 0x1E8E => 0x1E8F // LATIN CAPITAL LETTER Y WITH DOT ABOVE - case 0x1E90 => 0x1E91 // LATIN CAPITAL LETTER Z WITH CIRCUMFLEX - case 0x1E92 => 0x1E93 // LATIN CAPITAL LETTER Z WITH DOT BELOW - case 0x1E94 => 0x1E95 // LATIN CAPITAL LETTER Z WITH LINE BELOW - case 0x1E9B => 0x1E61 // LATIN SMALL LETTER LONG S WITH DOT ABOVE - case 0x1EA0 => 0x1EA1 // LATIN CAPITAL LETTER A WITH DOT BELOW - case 0x1EA2 => 0x1EA3 // LATIN CAPITAL LETTER A WITH HOOK ABOVE - case 0x1EA4 => 0x1EA5 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE - case 0x1EA6 => 0x1EA7 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE - case 0x1EA8 => 0x1EA9 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE - case 0x1EAA => 0x1EAB // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE - case 0x1EAC => 0x1EAD // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW - case 0x1EAE => 0x1EAF // LATIN CAPITAL LETTER A WITH BREVE AND ACUTE - case 0x1EB0 => 0x1EB1 // LATIN CAPITAL LETTER A WITH BREVE AND GRAVE - case 0x1EB2 => 0x1EB3 // LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE - case 0x1EB4 => 0x1EB5 // LATIN CAPITAL LETTER A WITH BREVE AND TILDE - case 0x1EB6 => 0x1EB7 // LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW - case 0x1EB8 => 0x1EB9 // LATIN CAPITAL LETTER E WITH DOT BELOW - case 0x1EBA => 0x1EBB // LATIN CAPITAL LETTER E WITH HOOK ABOVE - case 0x1EBC => 0x1EBD // LATIN CAPITAL LETTER E WITH TILDE - case 0x1EBE => 0x1EBF // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE - case 0x1EC0 => 0x1EC1 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE - case 0x1EC2 => 0x1EC3 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE - case 0x1EC4 => 0x1EC5 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE - case 0x1EC6 => 0x1EC7 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW - case 0x1EC8 => 0x1EC9 // LATIN CAPITAL LETTER I WITH HOOK ABOVE - case 0x1ECA => 0x1ECB // LATIN CAPITAL LETTER I WITH DOT BELOW - case 0x1ECC => 0x1ECD // LATIN CAPITAL LETTER O WITH DOT BELOW - case 0x1ECE => 0x1ECF // LATIN CAPITAL LETTER O WITH HOOK ABOVE - case 0x1ED0 => 0x1ED1 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE - case 0x1ED2 => 0x1ED3 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE - case 0x1ED4 => 0x1ED5 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE - case 0x1ED6 => 0x1ED7 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE - case 0x1ED8 => 0x1ED9 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW - case 0x1EDA => 0x1EDB // LATIN CAPITAL LETTER O WITH HORN AND ACUTE - case 0x1EDC => 0x1EDD // LATIN CAPITAL LETTER O WITH HORN AND GRAVE - case 0x1EDE => 0x1EDF // LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE - case 0x1EE0 => 0x1EE1 // LATIN CAPITAL LETTER O WITH HORN AND TILDE - case 0x1EE2 => 0x1EE3 // LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW - case 0x1EE4 => 0x1EE5 // LATIN CAPITAL LETTER U WITH DOT BELOW - case 0x1EE6 => 0x1EE7 // LATIN CAPITAL LETTER U WITH HOOK ABOVE - case 0x1EE8 => 0x1EE9 // LATIN CAPITAL LETTER U WITH HORN AND ACUTE - case 0x1EEA => 0x1EEB // LATIN CAPITAL LETTER U WITH HORN AND GRAVE - case 0x1EEC => 0x1EED // LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE - case 0x1EEE => 0x1EEF // LATIN CAPITAL LETTER U WITH HORN AND TILDE - case 0x1EF0 => 0x1EF1 // LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW - case 0x1EF2 => 0x1EF3 // LATIN CAPITAL LETTER Y WITH GRAVE - case 0x1EF4 => 0x1EF5 // LATIN CAPITAL LETTER Y WITH DOT BELOW - case 0x1EF6 => 0x1EF7 // LATIN CAPITAL LETTER Y WITH HOOK ABOVE - case 0x1EF8 => 0x1EF9 // LATIN CAPITAL LETTER Y WITH TILDE - case 0x1EFA => 0x1EFB // LATIN CAPITAL LETTER MIDDLE-WELSH LL - case 0x1EFC => 0x1EFD // LATIN CAPITAL LETTER MIDDLE-WELSH V - case 0x1EFE => 0x1EFF // LATIN CAPITAL LETTER Y WITH LOOP - case 0x1F08 => 0x1F00 // GREEK CAPITAL LETTER ALPHA WITH PSILI - case 0x1F09 => 0x1F01 // GREEK CAPITAL LETTER ALPHA WITH DASIA - case 0x1F0A => 0x1F02 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA - case 0x1F0B => 0x1F03 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA - case 0x1F0C => 0x1F04 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA - case 0x1F0D => 0x1F05 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA - case 0x1F0E => 0x1F06 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI - case 0x1F0F => 0x1F07 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI - case 0x1F18 => 0x1F10 // GREEK CAPITAL LETTER EPSILON WITH PSILI - case 0x1F19 => 0x1F11 // GREEK CAPITAL LETTER EPSILON WITH DASIA - case 0x1F1A => 0x1F12 // GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA - case 0x1F1B => 0x1F13 // GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA - case 0x1F1C => 0x1F14 // GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA - case 0x1F1D => 0x1F15 // GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA - case 0x1F28 => 0x1F20 // GREEK CAPITAL LETTER ETA WITH PSILI - case 0x1F29 => 0x1F21 // GREEK CAPITAL LETTER ETA WITH DASIA - case 0x1F2A => 0x1F22 // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA - case 0x1F2B => 0x1F23 // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA - case 0x1F2C => 0x1F24 // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA - case 0x1F2D => 0x1F25 // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA - case 0x1F2E => 0x1F26 // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI - case 0x1F2F => 0x1F27 // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI - case 0x1F38 => 0x1F30 // GREEK CAPITAL LETTER IOTA WITH PSILI - case 0x1F39 => 0x1F31 // GREEK CAPITAL LETTER IOTA WITH DASIA - case 0x1F3A => 0x1F32 // GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA - case 0x1F3B => 0x1F33 // GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA - case 0x1F3C => 0x1F34 // GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA - case 0x1F3D => 0x1F35 // GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA - case 0x1F3E => 0x1F36 // GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI - case 0x1F3F => 0x1F37 // GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI - case 0x1F48 => 0x1F40 // GREEK CAPITAL LETTER OMICRON WITH PSILI - case 0x1F49 => 0x1F41 // GREEK CAPITAL LETTER OMICRON WITH DASIA - case 0x1F4A => 0x1F42 // GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA - case 0x1F4B => 0x1F43 // GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA - case 0x1F4C => 0x1F44 // GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA - case 0x1F4D => 0x1F45 // GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA - case 0x1F59 => 0x1F51 // GREEK CAPITAL LETTER UPSILON WITH DASIA - case 0x1F5B => 0x1F53 // GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA - case 0x1F5D => 0x1F55 // GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA - case 0x1F5F => 0x1F57 // GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI - case 0x1F68 => 0x1F60 // GREEK CAPITAL LETTER OMEGA WITH PSILI - case 0x1F69 => 0x1F61 // GREEK CAPITAL LETTER OMEGA WITH DASIA - case 0x1F6A => 0x1F62 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA - case 0x1F6B => 0x1F63 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA - case 0x1F6C => 0x1F64 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA - case 0x1F6D => 0x1F65 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA - case 0x1F6E => 0x1F66 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI - case 0x1F6F => 0x1F67 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI - case 0x1FB8 => 0x1FB0 // GREEK CAPITAL LETTER ALPHA WITH VRACHY - case 0x1FB9 => 0x1FB1 // GREEK CAPITAL LETTER ALPHA WITH MACRON - case 0x1FBA => 0x1F70 // GREEK CAPITAL LETTER ALPHA WITH VARIA - case 0x1FBB => 0x1F71 // GREEK CAPITAL LETTER ALPHA WITH OXIA - case 0x1FBE => 0x03B9 // GREEK PROSGEGRAMMENI - case 0x1FC8 => 0x1F72 // GREEK CAPITAL LETTER EPSILON WITH VARIA - case 0x1FC9 => 0x1F73 // GREEK CAPITAL LETTER EPSILON WITH OXIA - case 0x1FCA => 0x1F74 // GREEK CAPITAL LETTER ETA WITH VARIA - case 0x1FCB => 0x1F75 // GREEK CAPITAL LETTER ETA WITH OXIA - case 0x1FD8 => 0x1FD0 // GREEK CAPITAL LETTER IOTA WITH VRACHY - case 0x1FD9 => 0x1FD1 // GREEK CAPITAL LETTER IOTA WITH MACRON - case 0x1FDA => 0x1F76 // GREEK CAPITAL LETTER IOTA WITH VARIA - case 0x1FDB => 0x1F77 // GREEK CAPITAL LETTER IOTA WITH OXIA - case 0x1FE8 => 0x1FE0 // GREEK CAPITAL LETTER UPSILON WITH VRACHY - case 0x1FE9 => 0x1FE1 // GREEK CAPITAL LETTER UPSILON WITH MACRON - case 0x1FEA => 0x1F7A // GREEK CAPITAL LETTER UPSILON WITH VARIA - case 0x1FEB => 0x1F7B // GREEK CAPITAL LETTER UPSILON WITH OXIA - case 0x1FEC => 0x1FE5 // GREEK CAPITAL LETTER RHO WITH DASIA - case 0x1FF8 => 0x1F78 // GREEK CAPITAL LETTER OMICRON WITH VARIA - case 0x1FF9 => 0x1F79 // GREEK CAPITAL LETTER OMICRON WITH OXIA - case 0x1FFA => 0x1F7C // GREEK CAPITAL LETTER OMEGA WITH VARIA - case 0x1FFB => 0x1F7D // GREEK CAPITAL LETTER OMEGA WITH OXIA - case 0x2126 => 0x03C9 // OHM SIGN - case 0x212A => 0x006B // KELVIN SIGN - case 0x212B => 0x00E5 // ANGSTROM SIGN - case 0x2132 => 0x214E // TURNED CAPITAL F + case 0x10a0 => 0x2d00 // GEORGIAN CAPITAL LETTER AN + case 0x10a1 => 0x2d01 // GEORGIAN CAPITAL LETTER BAN + case 0x10a2 => 0x2d02 // GEORGIAN CAPITAL LETTER GAN + case 0x10a3 => 0x2d03 // GEORGIAN CAPITAL LETTER DON + case 0x10a4 => 0x2d04 // GEORGIAN CAPITAL LETTER EN + case 0x10a5 => 0x2d05 // GEORGIAN CAPITAL LETTER VIN + case 0x10a6 => 0x2d06 // GEORGIAN CAPITAL LETTER ZEN + case 0x10a7 => 0x2d07 // GEORGIAN CAPITAL LETTER TAN + case 0x10a8 => 0x2d08 // GEORGIAN CAPITAL LETTER IN + case 0x10a9 => 0x2d09 // GEORGIAN CAPITAL LETTER KAN + case 0x10aa => 0x2d0a // GEORGIAN CAPITAL LETTER LAS + case 0x10ab => 0x2d0b // GEORGIAN CAPITAL LETTER MAN + case 0x10ac => 0x2d0c // GEORGIAN CAPITAL LETTER NAR + case 0x10ad => 0x2d0d // GEORGIAN CAPITAL LETTER ON + case 0x10ae => 0x2d0e // GEORGIAN CAPITAL LETTER PAR + case 0x10af => 0x2d0f // GEORGIAN CAPITAL LETTER ZHAR + case 0x10b0 => 0x2d10 // GEORGIAN CAPITAL LETTER RAE + case 0x10b1 => 0x2d11 // GEORGIAN CAPITAL LETTER SAN + case 0x10b2 => 0x2d12 // GEORGIAN CAPITAL LETTER TAR + case 0x10b3 => 0x2d13 // GEORGIAN CAPITAL LETTER UN + case 0x10b4 => 0x2d14 // GEORGIAN CAPITAL LETTER PHAR + case 0x10b5 => 0x2d15 // GEORGIAN CAPITAL LETTER KHAR + case 0x10b6 => 0x2d16 // GEORGIAN CAPITAL LETTER GHAN + case 0x10b7 => 0x2d17 // GEORGIAN CAPITAL LETTER QAR + case 0x10b8 => 0x2d18 // GEORGIAN CAPITAL LETTER SHIN + case 0x10b9 => 0x2d19 // GEORGIAN CAPITAL LETTER CHIN + case 0x10ba => 0x2d1a // GEORGIAN CAPITAL LETTER CAN + case 0x10bb => 0x2d1b // GEORGIAN CAPITAL LETTER JIL + case 0x10bc => 0x2d1c // GEORGIAN CAPITAL LETTER CIL + case 0x10bd => 0x2d1d // GEORGIAN CAPITAL LETTER CHAR + case 0x10be => 0x2d1e // GEORGIAN CAPITAL LETTER XAN + case 0x10bf => 0x2d1f // GEORGIAN CAPITAL LETTER JHAN + case 0x10c0 => 0x2d20 // GEORGIAN CAPITAL LETTER HAE + case 0x10c1 => 0x2d21 // GEORGIAN CAPITAL LETTER HE + case 0x10c2 => 0x2d22 // GEORGIAN CAPITAL LETTER HIE + case 0x10c3 => 0x2d23 // GEORGIAN CAPITAL LETTER WE + case 0x10c4 => 0x2d24 // GEORGIAN CAPITAL LETTER HAR + case 0x10c5 => 0x2d25 // GEORGIAN CAPITAL LETTER HOE + case 0x10c7 => 0x2d27 // GEORGIAN CAPITAL LETTER YN + case 0x10cd => 0x2d2d // GEORGIAN CAPITAL LETTER AEN + case 0x13f8 => 0x13f0 // CHEROKEE SMALL LETTER YE + case 0x13f9 => 0x13f1 // CHEROKEE SMALL LETTER YI + case 0x13fa => 0x13f2 // CHEROKEE SMALL LETTER YO + case 0x13fb => 0x13f3 // CHEROKEE SMALL LETTER YU + case 0x13fc => 0x13f4 // CHEROKEE SMALL LETTER YV + case 0x13fd => 0x13f5 // CHEROKEE SMALL LETTER MV + case 0x1c80 => 0x0432 // CYRILLIC SMALL LETTER ROUNDED VE + case 0x1c81 => 0x0434 // CYRILLIC SMALL LETTER LONG-LEGGED DE + case 0x1c82 => 0x043e // CYRILLIC SMALL LETTER NARROW O + case 0x1c83 => 0x0441 // CYRILLIC SMALL LETTER WIDE ES + case 0x1c84 => 0x0442 // CYRILLIC SMALL LETTER TALL TE + case 0x1c85 => 0x0442 // CYRILLIC SMALL LETTER THREE-LEGGED TE + case 0x1c86 => 0x044a // CYRILLIC SMALL LETTER TALL HARD SIGN + case 0x1c87 => 0x0463 // CYRILLIC SMALL LETTER TALL YAT + case 0x1c88 => 0xa64b // CYRILLIC SMALL LETTER UNBLENDED UK + case 0x1c90 => 0x10d0 // GEORGIAN MTAVRULI CAPITAL LETTER AN + case 0x1c91 => 0x10d1 // GEORGIAN MTAVRULI CAPITAL LETTER BAN + case 0x1c92 => 0x10d2 // GEORGIAN MTAVRULI CAPITAL LETTER GAN + case 0x1c93 => 0x10d3 // GEORGIAN MTAVRULI CAPITAL LETTER DON + case 0x1c94 => 0x10d4 // GEORGIAN MTAVRULI CAPITAL LETTER EN + case 0x1c95 => 0x10d5 // GEORGIAN MTAVRULI CAPITAL LETTER VIN + case 0x1c96 => 0x10d6 // GEORGIAN MTAVRULI CAPITAL LETTER ZEN + case 0x1c97 => 0x10d7 // GEORGIAN MTAVRULI CAPITAL LETTER TAN + case 0x1c98 => 0x10d8 // GEORGIAN MTAVRULI CAPITAL LETTER IN + case 0x1c99 => 0x10d9 // GEORGIAN MTAVRULI CAPITAL LETTER KAN + case 0x1c9a => 0x10da // GEORGIAN MTAVRULI CAPITAL LETTER LAS + case 0x1c9b => 0x10db // GEORGIAN MTAVRULI CAPITAL LETTER MAN + case 0x1c9c => 0x10dc // GEORGIAN MTAVRULI CAPITAL LETTER NAR + case 0x1c9d => 0x10dd // GEORGIAN MTAVRULI CAPITAL LETTER ON + case 0x1c9e => 0x10de // GEORGIAN MTAVRULI CAPITAL LETTER PAR + case 0x1c9f => 0x10df // GEORGIAN MTAVRULI CAPITAL LETTER ZHAR + case 0x1ca0 => 0x10e0 // GEORGIAN MTAVRULI CAPITAL LETTER RAE + case 0x1ca1 => 0x10e1 // GEORGIAN MTAVRULI CAPITAL LETTER SAN + case 0x1ca2 => 0x10e2 // GEORGIAN MTAVRULI CAPITAL LETTER TAR + case 0x1ca3 => 0x10e3 // GEORGIAN MTAVRULI CAPITAL LETTER UN + case 0x1ca4 => 0x10e4 // GEORGIAN MTAVRULI CAPITAL LETTER PHAR + case 0x1ca5 => 0x10e5 // GEORGIAN MTAVRULI CAPITAL LETTER KHAR + case 0x1ca6 => 0x10e6 // GEORGIAN MTAVRULI CAPITAL LETTER GHAN + case 0x1ca7 => 0x10e7 // GEORGIAN MTAVRULI CAPITAL LETTER QAR + case 0x1ca8 => 0x10e8 // GEORGIAN MTAVRULI CAPITAL LETTER SHIN + case 0x1ca9 => 0x10e9 // GEORGIAN MTAVRULI CAPITAL LETTER CHIN + case 0x1caa => 0x10ea // GEORGIAN MTAVRULI CAPITAL LETTER CAN + case 0x1cab => 0x10eb // GEORGIAN MTAVRULI CAPITAL LETTER JIL + case 0x1cac => 0x10ec // GEORGIAN MTAVRULI CAPITAL LETTER CIL + case 0x1cad => 0x10ed // GEORGIAN MTAVRULI CAPITAL LETTER CHAR + case 0x1cae => 0x10ee // GEORGIAN MTAVRULI CAPITAL LETTER XAN + case 0x1caf => 0x10ef // GEORGIAN MTAVRULI CAPITAL LETTER JHAN + case 0x1cb0 => 0x10f0 // GEORGIAN MTAVRULI CAPITAL LETTER HAE + case 0x1cb1 => 0x10f1 // GEORGIAN MTAVRULI CAPITAL LETTER HE + case 0x1cb2 => 0x10f2 // GEORGIAN MTAVRULI CAPITAL LETTER HIE + case 0x1cb3 => 0x10f3 // GEORGIAN MTAVRULI CAPITAL LETTER WE + case 0x1cb4 => 0x10f4 // GEORGIAN MTAVRULI CAPITAL LETTER HAR + case 0x1cb5 => 0x10f5 // GEORGIAN MTAVRULI CAPITAL LETTER HOE + case 0x1cb6 => 0x10f6 // GEORGIAN MTAVRULI CAPITAL LETTER FI + case 0x1cb7 => 0x10f7 // GEORGIAN MTAVRULI CAPITAL LETTER YN + case 0x1cb8 => 0x10f8 // GEORGIAN MTAVRULI CAPITAL LETTER ELIFI + case 0x1cb9 => 0x10f9 // GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN + case 0x1cba => 0x10fa // GEORGIAN MTAVRULI CAPITAL LETTER AIN + case 0x1cbd => 0x10fd // GEORGIAN MTAVRULI CAPITAL LETTER AEN + case 0x1cbe => 0x10fe // GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN + case 0x1cbf => 0x10ff // GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN + case 0x1e00 => 0x1e01 // LATIN CAPITAL LETTER A WITH RING BELOW + case 0x1e02 => 0x1e03 // LATIN CAPITAL LETTER B WITH DOT ABOVE + case 0x1e04 => 0x1e05 // LATIN CAPITAL LETTER B WITH DOT BELOW + case 0x1e06 => 0x1e07 // LATIN CAPITAL LETTER B WITH LINE BELOW + case 0x1e08 => 0x1e09 // LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE + case 0x1e0a => 0x1e0b // LATIN CAPITAL LETTER D WITH DOT ABOVE + case 0x1e0c => 0x1e0d // LATIN CAPITAL LETTER D WITH DOT BELOW + case 0x1e0e => 0x1e0f // LATIN CAPITAL LETTER D WITH LINE BELOW + case 0x1e10 => 0x1e11 // LATIN CAPITAL LETTER D WITH CEDILLA + case 0x1e12 => 0x1e13 // LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW + case 0x1e14 => 0x1e15 // LATIN CAPITAL LETTER E WITH MACRON AND GRAVE + case 0x1e16 => 0x1e17 // LATIN CAPITAL LETTER E WITH MACRON AND ACUTE + case 0x1e18 => 0x1e19 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW + case 0x1e1a => 0x1e1b // LATIN CAPITAL LETTER E WITH TILDE BELOW + case 0x1e1c => 0x1e1d // LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE + case 0x1e1e => 0x1e1f // LATIN CAPITAL LETTER F WITH DOT ABOVE + case 0x1e20 => 0x1e21 // LATIN CAPITAL LETTER G WITH MACRON + case 0x1e22 => 0x1e23 // LATIN CAPITAL LETTER H WITH DOT ABOVE + case 0x1e24 => 0x1e25 // LATIN CAPITAL LETTER H WITH DOT BELOW + case 0x1e26 => 0x1e27 // LATIN CAPITAL LETTER H WITH DIAERESIS + case 0x1e28 => 0x1e29 // LATIN CAPITAL LETTER H WITH CEDILLA + case 0x1e2a => 0x1e2b // LATIN CAPITAL LETTER H WITH BREVE BELOW + case 0x1e2c => 0x1e2d // LATIN CAPITAL LETTER I WITH TILDE BELOW + case 0x1e2e => 0x1e2f // LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE + case 0x1e30 => 0x1e31 // LATIN CAPITAL LETTER K WITH ACUTE + case 0x1e32 => 0x1e33 // LATIN CAPITAL LETTER K WITH DOT BELOW + case 0x1e34 => 0x1e35 // LATIN CAPITAL LETTER K WITH LINE BELOW + case 0x1e36 => 0x1e37 // LATIN CAPITAL LETTER L WITH DOT BELOW + case 0x1e38 => 0x1e39 // LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON + case 0x1e3a => 0x1e3b // LATIN CAPITAL LETTER L WITH LINE BELOW + case 0x1e3c => 0x1e3d // LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW + case 0x1e3e => 0x1e3f // LATIN CAPITAL LETTER M WITH ACUTE + case 0x1e40 => 0x1e41 // LATIN CAPITAL LETTER M WITH DOT ABOVE + case 0x1e42 => 0x1e43 // LATIN CAPITAL LETTER M WITH DOT BELOW + case 0x1e44 => 0x1e45 // LATIN CAPITAL LETTER N WITH DOT ABOVE + case 0x1e46 => 0x1e47 // LATIN CAPITAL LETTER N WITH DOT BELOW + case 0x1e48 => 0x1e49 // LATIN CAPITAL LETTER N WITH LINE BELOW + case 0x1e4a => 0x1e4b // LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW + case 0x1e4c => 0x1e4d // LATIN CAPITAL LETTER O WITH TILDE AND ACUTE + case 0x1e4e => 0x1e4f // LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS + case 0x1e50 => 0x1e51 // LATIN CAPITAL LETTER O WITH MACRON AND GRAVE + case 0x1e52 => 0x1e53 // LATIN CAPITAL LETTER O WITH MACRON AND ACUTE + case 0x1e54 => 0x1e55 // LATIN CAPITAL LETTER P WITH ACUTE + case 0x1e56 => 0x1e57 // LATIN CAPITAL LETTER P WITH DOT ABOVE + case 0x1e58 => 0x1e59 // LATIN CAPITAL LETTER R WITH DOT ABOVE + case 0x1e5a => 0x1e5b // LATIN CAPITAL LETTER R WITH DOT BELOW + case 0x1e5c => 0x1e5d // LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON + case 0x1e5e => 0x1e5f // LATIN CAPITAL LETTER R WITH LINE BELOW + case 0x1e60 => 0x1e61 // LATIN CAPITAL LETTER S WITH DOT ABOVE + case 0x1e62 => 0x1e63 // LATIN CAPITAL LETTER S WITH DOT BELOW + case 0x1e64 => 0x1e65 // LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE + case 0x1e66 => 0x1e67 // LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE + case 0x1e68 => 0x1e69 // LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE + case 0x1e6a => 0x1e6b // LATIN CAPITAL LETTER T WITH DOT ABOVE + case 0x1e6c => 0x1e6d // LATIN CAPITAL LETTER T WITH DOT BELOW + case 0x1e6e => 0x1e6f // LATIN CAPITAL LETTER T WITH LINE BELOW + case 0x1e70 => 0x1e71 // LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW + case 0x1e72 => 0x1e73 // LATIN CAPITAL LETTER U WITH DIAERESIS BELOW + case 0x1e74 => 0x1e75 // LATIN CAPITAL LETTER U WITH TILDE BELOW + case 0x1e76 => 0x1e77 // LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW + case 0x1e78 => 0x1e79 // LATIN CAPITAL LETTER U WITH TILDE AND ACUTE + case 0x1e7a => 0x1e7b // LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS + case 0x1e7c => 0x1e7d // LATIN CAPITAL LETTER V WITH TILDE + case 0x1e7e => 0x1e7f // LATIN CAPITAL LETTER V WITH DOT BELOW + case 0x1e80 => 0x1e81 // LATIN CAPITAL LETTER W WITH GRAVE + case 0x1e82 => 0x1e83 // LATIN CAPITAL LETTER W WITH ACUTE + case 0x1e84 => 0x1e85 // LATIN CAPITAL LETTER W WITH DIAERESIS + case 0x1e86 => 0x1e87 // LATIN CAPITAL LETTER W WITH DOT ABOVE + case 0x1e88 => 0x1e89 // LATIN CAPITAL LETTER W WITH DOT BELOW + case 0x1e8a => 0x1e8b // LATIN CAPITAL LETTER X WITH DOT ABOVE + case 0x1e8c => 0x1e8d // LATIN CAPITAL LETTER X WITH DIAERESIS + case 0x1e8e => 0x1e8f // LATIN CAPITAL LETTER Y WITH DOT ABOVE + case 0x1e90 => 0x1e91 // LATIN CAPITAL LETTER Z WITH CIRCUMFLEX + case 0x1e92 => 0x1e93 // LATIN CAPITAL LETTER Z WITH DOT BELOW + case 0x1e94 => 0x1e95 // LATIN CAPITAL LETTER Z WITH LINE BELOW + case 0x1e9b => 0x1e61 // LATIN SMALL LETTER LONG S WITH DOT ABOVE + case 0x1ea0 => 0x1ea1 // LATIN CAPITAL LETTER A WITH DOT BELOW + case 0x1ea2 => 0x1ea3 // LATIN CAPITAL LETTER A WITH HOOK ABOVE + case 0x1ea4 => 0x1ea5 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE + case 0x1ea6 => 0x1ea7 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE + case 0x1ea8 => 0x1ea9 // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1eaa => 0x1eab // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE + case 0x1eac => 0x1ead // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW + case 0x1eae => 0x1eaf // LATIN CAPITAL LETTER A WITH BREVE AND ACUTE + case 0x1eb0 => 0x1eb1 // LATIN CAPITAL LETTER A WITH BREVE AND GRAVE + case 0x1eb2 => 0x1eb3 // LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE + case 0x1eb4 => 0x1eb5 // LATIN CAPITAL LETTER A WITH BREVE AND TILDE + case 0x1eb6 => 0x1eb7 // LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW + case 0x1eb8 => 0x1eb9 // LATIN CAPITAL LETTER E WITH DOT BELOW + case 0x1eba => 0x1ebb // LATIN CAPITAL LETTER E WITH HOOK ABOVE + case 0x1ebc => 0x1ebd // LATIN CAPITAL LETTER E WITH TILDE + case 0x1ebe => 0x1ebf // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE + case 0x1ec0 => 0x1ec1 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE + case 0x1ec2 => 0x1ec3 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1ec4 => 0x1ec5 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE + case 0x1ec6 => 0x1ec7 // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW + case 0x1ec8 => 0x1ec9 // LATIN CAPITAL LETTER I WITH HOOK ABOVE + case 0x1eca => 0x1ecb // LATIN CAPITAL LETTER I WITH DOT BELOW + case 0x1ecc => 0x1ecd // LATIN CAPITAL LETTER O WITH DOT BELOW + case 0x1ece => 0x1ecf // LATIN CAPITAL LETTER O WITH HOOK ABOVE + case 0x1ed0 => 0x1ed1 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE + case 0x1ed2 => 0x1ed3 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE + case 0x1ed4 => 0x1ed5 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE + case 0x1ed6 => 0x1ed7 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE + case 0x1ed8 => 0x1ed9 // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW + case 0x1eda => 0x1edb // LATIN CAPITAL LETTER O WITH HORN AND ACUTE + case 0x1edc => 0x1edd // LATIN CAPITAL LETTER O WITH HORN AND GRAVE + case 0x1ede => 0x1edf // LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE + case 0x1ee0 => 0x1ee1 // LATIN CAPITAL LETTER O WITH HORN AND TILDE + case 0x1ee2 => 0x1ee3 // LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW + case 0x1ee4 => 0x1ee5 // LATIN CAPITAL LETTER U WITH DOT BELOW + case 0x1ee6 => 0x1ee7 // LATIN CAPITAL LETTER U WITH HOOK ABOVE + case 0x1ee8 => 0x1ee9 // LATIN CAPITAL LETTER U WITH HORN AND ACUTE + case 0x1eea => 0x1eeb // LATIN CAPITAL LETTER U WITH HORN AND GRAVE + case 0x1eec => 0x1eed // LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE + case 0x1eee => 0x1eef // LATIN CAPITAL LETTER U WITH HORN AND TILDE + case 0x1ef0 => 0x1ef1 // LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW + case 0x1ef2 => 0x1ef3 // LATIN CAPITAL LETTER Y WITH GRAVE + case 0x1ef4 => 0x1ef5 // LATIN CAPITAL LETTER Y WITH DOT BELOW + case 0x1ef6 => 0x1ef7 // LATIN CAPITAL LETTER Y WITH HOOK ABOVE + case 0x1ef8 => 0x1ef9 // LATIN CAPITAL LETTER Y WITH TILDE + case 0x1efa => 0x1efb // LATIN CAPITAL LETTER MIDDLE-WELSH LL + case 0x1efc => 0x1efd // LATIN CAPITAL LETTER MIDDLE-WELSH V + case 0x1efe => 0x1eff // LATIN CAPITAL LETTER Y WITH LOOP + case 0x1f08 => 0x1f00 // GREEK CAPITAL LETTER ALPHA WITH PSILI + case 0x1f09 => 0x1f01 // GREEK CAPITAL LETTER ALPHA WITH DASIA + case 0x1f0a => 0x1f02 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA + case 0x1f0b => 0x1f03 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA + case 0x1f0c => 0x1f04 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA + case 0x1f0d => 0x1f05 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA + case 0x1f0e => 0x1f06 // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI + case 0x1f0f => 0x1f07 // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI + case 0x1f18 => 0x1f10 // GREEK CAPITAL LETTER EPSILON WITH PSILI + case 0x1f19 => 0x1f11 // GREEK CAPITAL LETTER EPSILON WITH DASIA + case 0x1f1a => 0x1f12 // GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA + case 0x1f1b => 0x1f13 // GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA + case 0x1f1c => 0x1f14 // GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA + case 0x1f1d => 0x1f15 // GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA + case 0x1f28 => 0x1f20 // GREEK CAPITAL LETTER ETA WITH PSILI + case 0x1f29 => 0x1f21 // GREEK CAPITAL LETTER ETA WITH DASIA + case 0x1f2a => 0x1f22 // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA + case 0x1f2b => 0x1f23 // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA + case 0x1f2c => 0x1f24 // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA + case 0x1f2d => 0x1f25 // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA + case 0x1f2e => 0x1f26 // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI + case 0x1f2f => 0x1f27 // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI + case 0x1f38 => 0x1f30 // GREEK CAPITAL LETTER IOTA WITH PSILI + case 0x1f39 => 0x1f31 // GREEK CAPITAL LETTER IOTA WITH DASIA + case 0x1f3a => 0x1f32 // GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA + case 0x1f3b => 0x1f33 // GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA + case 0x1f3c => 0x1f34 // GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA + case 0x1f3d => 0x1f35 // GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA + case 0x1f3e => 0x1f36 // GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI + case 0x1f3f => 0x1f37 // GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI + case 0x1f48 => 0x1f40 // GREEK CAPITAL LETTER OMICRON WITH PSILI + case 0x1f49 => 0x1f41 // GREEK CAPITAL LETTER OMICRON WITH DASIA + case 0x1f4a => 0x1f42 // GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA + case 0x1f4b => 0x1f43 // GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA + case 0x1f4c => 0x1f44 // GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA + case 0x1f4d => 0x1f45 // GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA + case 0x1f59 => 0x1f51 // GREEK CAPITAL LETTER UPSILON WITH DASIA + case 0x1f5b => 0x1f53 // GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA + case 0x1f5d => 0x1f55 // GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA + case 0x1f5f => 0x1f57 // GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI + case 0x1f68 => 0x1f60 // GREEK CAPITAL LETTER OMEGA WITH PSILI + case 0x1f69 => 0x1f61 // GREEK CAPITAL LETTER OMEGA WITH DASIA + case 0x1f6a => 0x1f62 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA + case 0x1f6b => 0x1f63 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA + case 0x1f6c => 0x1f64 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA + case 0x1f6d => 0x1f65 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA + case 0x1f6e => 0x1f66 // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI + case 0x1f6f => 0x1f67 // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI + case 0x1fb8 => 0x1fb0 // GREEK CAPITAL LETTER ALPHA WITH VRACHY + case 0x1fb9 => 0x1fb1 // GREEK CAPITAL LETTER ALPHA WITH MACRON + case 0x1fba => 0x1f70 // GREEK CAPITAL LETTER ALPHA WITH VARIA + case 0x1fbb => 0x1f71 // GREEK CAPITAL LETTER ALPHA WITH OXIA + case 0x1fbe => 0x03b9 // GREEK PROSGEGRAMMENI + case 0x1fc8 => 0x1f72 // GREEK CAPITAL LETTER EPSILON WITH VARIA + case 0x1fc9 => 0x1f73 // GREEK CAPITAL LETTER EPSILON WITH OXIA + case 0x1fca => 0x1f74 // GREEK CAPITAL LETTER ETA WITH VARIA + case 0x1fcb => 0x1f75 // GREEK CAPITAL LETTER ETA WITH OXIA + case 0x1fd8 => 0x1fd0 // GREEK CAPITAL LETTER IOTA WITH VRACHY + case 0x1fd9 => 0x1fd1 // GREEK CAPITAL LETTER IOTA WITH MACRON + case 0x1fda => 0x1f76 // GREEK CAPITAL LETTER IOTA WITH VARIA + case 0x1fdb => 0x1f77 // GREEK CAPITAL LETTER IOTA WITH OXIA + case 0x1fe8 => 0x1fe0 // GREEK CAPITAL LETTER UPSILON WITH VRACHY + case 0x1fe9 => 0x1fe1 // GREEK CAPITAL LETTER UPSILON WITH MACRON + case 0x1fea => 0x1f7a // GREEK CAPITAL LETTER UPSILON WITH VARIA + case 0x1feb => 0x1f7b // GREEK CAPITAL LETTER UPSILON WITH OXIA + case 0x1fec => 0x1fe5 // GREEK CAPITAL LETTER RHO WITH DASIA + case 0x1ff8 => 0x1f78 // GREEK CAPITAL LETTER OMICRON WITH VARIA + case 0x1ff9 => 0x1f79 // GREEK CAPITAL LETTER OMICRON WITH OXIA + case 0x1ffa => 0x1f7c // GREEK CAPITAL LETTER OMEGA WITH VARIA + case 0x1ffb => 0x1f7d // GREEK CAPITAL LETTER OMEGA WITH OXIA + case 0x2126 => 0x03c9 // OHM SIGN + case 0x212a => 0x006b // KELVIN SIGN + case 0x212b => 0x00e5 // ANGSTROM SIGN + case 0x2132 => 0x214e // TURNED CAPITAL F case 0x2160 => 0x2170 // ROMAN NUMERAL ONE case 0x2161 => 0x2171 // ROMAN NUMERAL TWO case 0x2162 => 0x2172 // ROMAN NUMERAL THREE @@ -1002,644 +1176,644 @@ private[ci] object CaseFolding { case 0x2167 => 0x2177 // ROMAN NUMERAL EIGHT case 0x2168 => 0x2178 // ROMAN NUMERAL NINE case 0x2169 => 0x2179 // ROMAN NUMERAL TEN - case 0x216A => 0x217A // ROMAN NUMERAL ELEVEN - case 0x216B => 0x217B // ROMAN NUMERAL TWELVE - case 0x216C => 0x217C // ROMAN NUMERAL FIFTY - case 0x216D => 0x217D // ROMAN NUMERAL ONE HUNDRED - case 0x216E => 0x217E // ROMAN NUMERAL FIVE HUNDRED - case 0x216F => 0x217F // ROMAN NUMERAL ONE THOUSAND + case 0x216a => 0x217a // ROMAN NUMERAL ELEVEN + case 0x216b => 0x217b // ROMAN NUMERAL TWELVE + case 0x216c => 0x217c // ROMAN NUMERAL FIFTY + case 0x216d => 0x217d // ROMAN NUMERAL ONE HUNDRED + case 0x216e => 0x217e // ROMAN NUMERAL FIVE HUNDRED + case 0x216f => 0x217f // ROMAN NUMERAL ONE THOUSAND case 0x2183 => 0x2184 // ROMAN NUMERAL REVERSED ONE HUNDRED - case 0x24B6 => 0x24D0 // CIRCLED LATIN CAPITAL LETTER A - case 0x24B7 => 0x24D1 // CIRCLED LATIN CAPITAL LETTER B - case 0x24B8 => 0x24D2 // CIRCLED LATIN CAPITAL LETTER C - case 0x24B9 => 0x24D3 // CIRCLED LATIN CAPITAL LETTER D - case 0x24BA => 0x24D4 // CIRCLED LATIN CAPITAL LETTER E - case 0x24BB => 0x24D5 // CIRCLED LATIN CAPITAL LETTER F - case 0x24BC => 0x24D6 // CIRCLED LATIN CAPITAL LETTER G - case 0x24BD => 0x24D7 // CIRCLED LATIN CAPITAL LETTER H - case 0x24BE => 0x24D8 // CIRCLED LATIN CAPITAL LETTER I - case 0x24BF => 0x24D9 // CIRCLED LATIN CAPITAL LETTER J - case 0x24C0 => 0x24DA // CIRCLED LATIN CAPITAL LETTER K - case 0x24C1 => 0x24DB // CIRCLED LATIN CAPITAL LETTER L - case 0x24C2 => 0x24DC // CIRCLED LATIN CAPITAL LETTER M - case 0x24C3 => 0x24DD // CIRCLED LATIN CAPITAL LETTER N - case 0x24C4 => 0x24DE // CIRCLED LATIN CAPITAL LETTER O - case 0x24C5 => 0x24DF // CIRCLED LATIN CAPITAL LETTER P - case 0x24C6 => 0x24E0 // CIRCLED LATIN CAPITAL LETTER Q - case 0x24C7 => 0x24E1 // CIRCLED LATIN CAPITAL LETTER R - case 0x24C8 => 0x24E2 // CIRCLED LATIN CAPITAL LETTER S - case 0x24C9 => 0x24E3 // CIRCLED LATIN CAPITAL LETTER T - case 0x24CA => 0x24E4 // CIRCLED LATIN CAPITAL LETTER U - case 0x24CB => 0x24E5 // CIRCLED LATIN CAPITAL LETTER V - case 0x24CC => 0x24E6 // CIRCLED LATIN CAPITAL LETTER W - case 0x24CD => 0x24E7 // CIRCLED LATIN CAPITAL LETTER X - case 0x24CE => 0x24E8 // CIRCLED LATIN CAPITAL LETTER Y - case 0x24CF => 0x24E9 // CIRCLED LATIN CAPITAL LETTER Z - case 0x2C00 => 0x2C30 // GLAGOLITIC CAPITAL LETTER AZU - case 0x2C01 => 0x2C31 // GLAGOLITIC CAPITAL LETTER BUKY - case 0x2C02 => 0x2C32 // GLAGOLITIC CAPITAL LETTER VEDE - case 0x2C03 => 0x2C33 // GLAGOLITIC CAPITAL LETTER GLAGOLI - case 0x2C04 => 0x2C34 // GLAGOLITIC CAPITAL LETTER DOBRO - case 0x2C05 => 0x2C35 // GLAGOLITIC CAPITAL LETTER YESTU - case 0x2C06 => 0x2C36 // GLAGOLITIC CAPITAL LETTER ZHIVETE - case 0x2C07 => 0x2C37 // GLAGOLITIC CAPITAL LETTER DZELO - case 0x2C08 => 0x2C38 // GLAGOLITIC CAPITAL LETTER ZEMLJA - case 0x2C09 => 0x2C39 // GLAGOLITIC CAPITAL LETTER IZHE - case 0x2C0A => 0x2C3A // GLAGOLITIC CAPITAL LETTER INITIAL IZHE - case 0x2C0B => 0x2C3B // GLAGOLITIC CAPITAL LETTER I - case 0x2C0C => 0x2C3C // GLAGOLITIC CAPITAL LETTER DJERVI - case 0x2C0D => 0x2C3D // GLAGOLITIC CAPITAL LETTER KAKO - case 0x2C0E => 0x2C3E // GLAGOLITIC CAPITAL LETTER LJUDIJE - case 0x2C0F => 0x2C3F // GLAGOLITIC CAPITAL LETTER MYSLITE - case 0x2C10 => 0x2C40 // GLAGOLITIC CAPITAL LETTER NASHI - case 0x2C11 => 0x2C41 // GLAGOLITIC CAPITAL LETTER ONU - case 0x2C12 => 0x2C42 // GLAGOLITIC CAPITAL LETTER POKOJI - case 0x2C13 => 0x2C43 // GLAGOLITIC CAPITAL LETTER RITSI - case 0x2C14 => 0x2C44 // GLAGOLITIC CAPITAL LETTER SLOVO - case 0x2C15 => 0x2C45 // GLAGOLITIC CAPITAL LETTER TVRIDO - case 0x2C16 => 0x2C46 // GLAGOLITIC CAPITAL LETTER UKU - case 0x2C17 => 0x2C47 // GLAGOLITIC CAPITAL LETTER FRITU - case 0x2C18 => 0x2C48 // GLAGOLITIC CAPITAL LETTER HERU - case 0x2C19 => 0x2C49 // GLAGOLITIC CAPITAL LETTER OTU - case 0x2C1A => 0x2C4A // GLAGOLITIC CAPITAL LETTER PE - case 0x2C1B => 0x2C4B // GLAGOLITIC CAPITAL LETTER SHTA - case 0x2C1C => 0x2C4C // GLAGOLITIC CAPITAL LETTER TSI - case 0x2C1D => 0x2C4D // GLAGOLITIC CAPITAL LETTER CHRIVI - case 0x2C1E => 0x2C4E // GLAGOLITIC CAPITAL LETTER SHA - case 0x2C1F => 0x2C4F // GLAGOLITIC CAPITAL LETTER YERU - case 0x2C20 => 0x2C50 // GLAGOLITIC CAPITAL LETTER YERI - case 0x2C21 => 0x2C51 // GLAGOLITIC CAPITAL LETTER YATI - case 0x2C22 => 0x2C52 // GLAGOLITIC CAPITAL LETTER SPIDERY HA - case 0x2C23 => 0x2C53 // GLAGOLITIC CAPITAL LETTER YU - case 0x2C24 => 0x2C54 // GLAGOLITIC CAPITAL LETTER SMALL YUS - case 0x2C25 => 0x2C55 // GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL - case 0x2C26 => 0x2C56 // GLAGOLITIC CAPITAL LETTER YO - case 0x2C27 => 0x2C57 // GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS - case 0x2C28 => 0x2C58 // GLAGOLITIC CAPITAL LETTER BIG YUS - case 0x2C29 => 0x2C59 // GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS - case 0x2C2A => 0x2C5A // GLAGOLITIC CAPITAL LETTER FITA - case 0x2C2B => 0x2C5B // GLAGOLITIC CAPITAL LETTER IZHITSA - case 0x2C2C => 0x2C5C // GLAGOLITIC CAPITAL LETTER SHTAPIC - case 0x2C2D => 0x2C5D // GLAGOLITIC CAPITAL LETTER TROKUTASTI A - case 0x2C2E => 0x2C5E // GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE - case 0x2C2F => 0x2C5F // GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI - case 0x2C60 => 0x2C61 // LATIN CAPITAL LETTER L WITH DOUBLE BAR - case 0x2C62 => 0x026B // LATIN CAPITAL LETTER L WITH MIDDLE TILDE - case 0x2C63 => 0x1D7D // LATIN CAPITAL LETTER P WITH STROKE - case 0x2C64 => 0x027D // LATIN CAPITAL LETTER R WITH TAIL - case 0x2C67 => 0x2C68 // LATIN CAPITAL LETTER H WITH DESCENDER - case 0x2C69 => 0x2C6A // LATIN CAPITAL LETTER K WITH DESCENDER - case 0x2C6B => 0x2C6C // LATIN CAPITAL LETTER Z WITH DESCENDER - case 0x2C6D => 0x0251 // LATIN CAPITAL LETTER ALPHA - case 0x2C6E => 0x0271 // LATIN CAPITAL LETTER M WITH HOOK - case 0x2C6F => 0x0250 // LATIN CAPITAL LETTER TURNED A - case 0x2C70 => 0x0252 // LATIN CAPITAL LETTER TURNED ALPHA - case 0x2C72 => 0x2C73 // LATIN CAPITAL LETTER W WITH HOOK - case 0x2C75 => 0x2C76 // LATIN CAPITAL LETTER HALF H - case 0x2C7E => 0x023F // LATIN CAPITAL LETTER S WITH SWASH TAIL - case 0x2C7F => 0x0240 // LATIN CAPITAL LETTER Z WITH SWASH TAIL - case 0x2C80 => 0x2C81 // COPTIC CAPITAL LETTER ALFA - case 0x2C82 => 0x2C83 // COPTIC CAPITAL LETTER VIDA - case 0x2C84 => 0x2C85 // COPTIC CAPITAL LETTER GAMMA - case 0x2C86 => 0x2C87 // COPTIC CAPITAL LETTER DALDA - case 0x2C88 => 0x2C89 // COPTIC CAPITAL LETTER EIE - case 0x2C8A => 0x2C8B // COPTIC CAPITAL LETTER SOU - case 0x2C8C => 0x2C8D // COPTIC CAPITAL LETTER ZATA - case 0x2C8E => 0x2C8F // COPTIC CAPITAL LETTER HATE - case 0x2C90 => 0x2C91 // COPTIC CAPITAL LETTER THETHE - case 0x2C92 => 0x2C93 // COPTIC CAPITAL LETTER IAUDA - case 0x2C94 => 0x2C95 // COPTIC CAPITAL LETTER KAPA - case 0x2C96 => 0x2C97 // COPTIC CAPITAL LETTER LAULA - case 0x2C98 => 0x2C99 // COPTIC CAPITAL LETTER MI - case 0x2C9A => 0x2C9B // COPTIC CAPITAL LETTER NI - case 0x2C9C => 0x2C9D // COPTIC CAPITAL LETTER KSI - case 0x2C9E => 0x2C9F // COPTIC CAPITAL LETTER O - case 0x2CA0 => 0x2CA1 // COPTIC CAPITAL LETTER PI - case 0x2CA2 => 0x2CA3 // COPTIC CAPITAL LETTER RO - case 0x2CA4 => 0x2CA5 // COPTIC CAPITAL LETTER SIMA - case 0x2CA6 => 0x2CA7 // COPTIC CAPITAL LETTER TAU - case 0x2CA8 => 0x2CA9 // COPTIC CAPITAL LETTER UA - case 0x2CAA => 0x2CAB // COPTIC CAPITAL LETTER FI - case 0x2CAC => 0x2CAD // COPTIC CAPITAL LETTER KHI - case 0x2CAE => 0x2CAF // COPTIC CAPITAL LETTER PSI - case 0x2CB0 => 0x2CB1 // COPTIC CAPITAL LETTER OOU - case 0x2CB2 => 0x2CB3 // COPTIC CAPITAL LETTER DIALECT-P ALEF - case 0x2CB4 => 0x2CB5 // COPTIC CAPITAL LETTER OLD COPTIC AIN - case 0x2CB6 => 0x2CB7 // COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE - case 0x2CB8 => 0x2CB9 // COPTIC CAPITAL LETTER DIALECT-P KAPA - case 0x2CBA => 0x2CBB // COPTIC CAPITAL LETTER DIALECT-P NI - case 0x2CBC => 0x2CBD // COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI - case 0x2CBE => 0x2CBF // COPTIC CAPITAL LETTER OLD COPTIC OOU - case 0x2CC0 => 0x2CC1 // COPTIC CAPITAL LETTER SAMPI - case 0x2CC2 => 0x2CC3 // COPTIC CAPITAL LETTER CROSSED SHEI - case 0x2CC4 => 0x2CC5 // COPTIC CAPITAL LETTER OLD COPTIC SHEI - case 0x2CC6 => 0x2CC7 // COPTIC CAPITAL LETTER OLD COPTIC ESH - case 0x2CC8 => 0x2CC9 // COPTIC CAPITAL LETTER AKHMIMIC KHEI - case 0x2CCA => 0x2CCB // COPTIC CAPITAL LETTER DIALECT-P HORI - case 0x2CCC => 0x2CCD // COPTIC CAPITAL LETTER OLD COPTIC HORI - case 0x2CCE => 0x2CCF // COPTIC CAPITAL LETTER OLD COPTIC HA - case 0x2CD0 => 0x2CD1 // COPTIC CAPITAL LETTER L-SHAPED HA - case 0x2CD2 => 0x2CD3 // COPTIC CAPITAL LETTER OLD COPTIC HEI - case 0x2CD4 => 0x2CD5 // COPTIC CAPITAL LETTER OLD COPTIC HAT - case 0x2CD6 => 0x2CD7 // COPTIC CAPITAL LETTER OLD COPTIC GANGIA - case 0x2CD8 => 0x2CD9 // COPTIC CAPITAL LETTER OLD COPTIC DJA - case 0x2CDA => 0x2CDB // COPTIC CAPITAL LETTER OLD COPTIC SHIMA - case 0x2CDC => 0x2CDD // COPTIC CAPITAL LETTER OLD NUBIAN SHIMA - case 0x2CDE => 0x2CDF // COPTIC CAPITAL LETTER OLD NUBIAN NGI - case 0x2CE0 => 0x2CE1 // COPTIC CAPITAL LETTER OLD NUBIAN NYI - case 0x2CE2 => 0x2CE3 // COPTIC CAPITAL LETTER OLD NUBIAN WAU - case 0x2CEB => 0x2CEC // COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI - case 0x2CED => 0x2CEE // COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA - case 0x2CF2 => 0x2CF3 // COPTIC CAPITAL LETTER BOHAIRIC KHEI - case 0xA640 => 0xA641 // CYRILLIC CAPITAL LETTER ZEMLYA - case 0xA642 => 0xA643 // CYRILLIC CAPITAL LETTER DZELO - case 0xA644 => 0xA645 // CYRILLIC CAPITAL LETTER REVERSED DZE - case 0xA646 => 0xA647 // CYRILLIC CAPITAL LETTER IOTA - case 0xA648 => 0xA649 // CYRILLIC CAPITAL LETTER DJERV - case 0xA64A => 0xA64B // CYRILLIC CAPITAL LETTER MONOGRAPH UK - case 0xA64C => 0xA64D // CYRILLIC CAPITAL LETTER BROAD OMEGA - case 0xA64E => 0xA64F // CYRILLIC CAPITAL LETTER NEUTRAL YER - case 0xA650 => 0xA651 // CYRILLIC CAPITAL LETTER YERU WITH BACK YER - case 0xA652 => 0xA653 // CYRILLIC CAPITAL LETTER IOTIFIED YAT - case 0xA654 => 0xA655 // CYRILLIC CAPITAL LETTER REVERSED YU - case 0xA656 => 0xA657 // CYRILLIC CAPITAL LETTER IOTIFIED A - case 0xA658 => 0xA659 // CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS - case 0xA65A => 0xA65B // CYRILLIC CAPITAL LETTER BLENDED YUS - case 0xA65C => 0xA65D // CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS - case 0xA65E => 0xA65F // CYRILLIC CAPITAL LETTER YN - case 0xA660 => 0xA661 // CYRILLIC CAPITAL LETTER REVERSED TSE - case 0xA662 => 0xA663 // CYRILLIC CAPITAL LETTER SOFT DE - case 0xA664 => 0xA665 // CYRILLIC CAPITAL LETTER SOFT EL - case 0xA666 => 0xA667 // CYRILLIC CAPITAL LETTER SOFT EM - case 0xA668 => 0xA669 // CYRILLIC CAPITAL LETTER MONOCULAR O - case 0xA66A => 0xA66B // CYRILLIC CAPITAL LETTER BINOCULAR O - case 0xA66C => 0xA66D // CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O - case 0xA680 => 0xA681 // CYRILLIC CAPITAL LETTER DWE - case 0xA682 => 0xA683 // CYRILLIC CAPITAL LETTER DZWE - case 0xA684 => 0xA685 // CYRILLIC CAPITAL LETTER ZHWE - case 0xA686 => 0xA687 // CYRILLIC CAPITAL LETTER CCHE - case 0xA688 => 0xA689 // CYRILLIC CAPITAL LETTER DZZE - case 0xA68A => 0xA68B // CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK - case 0xA68C => 0xA68D // CYRILLIC CAPITAL LETTER TWE - case 0xA68E => 0xA68F // CYRILLIC CAPITAL LETTER TSWE - case 0xA690 => 0xA691 // CYRILLIC CAPITAL LETTER TSSE - case 0xA692 => 0xA693 // CYRILLIC CAPITAL LETTER TCHE - case 0xA694 => 0xA695 // CYRILLIC CAPITAL LETTER HWE - case 0xA696 => 0xA697 // CYRILLIC CAPITAL LETTER SHWE - case 0xA698 => 0xA699 // CYRILLIC CAPITAL LETTER DOUBLE O - case 0xA69A => 0xA69B // CYRILLIC CAPITAL LETTER CROSSED O - case 0xA722 => 0xA723 // LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF - case 0xA724 => 0xA725 // LATIN CAPITAL LETTER EGYPTOLOGICAL AIN - case 0xA726 => 0xA727 // LATIN CAPITAL LETTER HENG - case 0xA728 => 0xA729 // LATIN CAPITAL LETTER TZ - case 0xA72A => 0xA72B // LATIN CAPITAL LETTER TRESILLO - case 0xA72C => 0xA72D // LATIN CAPITAL LETTER CUATRILLO - case 0xA72E => 0xA72F // LATIN CAPITAL LETTER CUATRILLO WITH COMMA - case 0xA732 => 0xA733 // LATIN CAPITAL LETTER AA - case 0xA734 => 0xA735 // LATIN CAPITAL LETTER AO - case 0xA736 => 0xA737 // LATIN CAPITAL LETTER AU - case 0xA738 => 0xA739 // LATIN CAPITAL LETTER AV - case 0xA73A => 0xA73B // LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR - case 0xA73C => 0xA73D // LATIN CAPITAL LETTER AY - case 0xA73E => 0xA73F // LATIN CAPITAL LETTER REVERSED C WITH DOT - case 0xA740 => 0xA741 // LATIN CAPITAL LETTER K WITH STROKE - case 0xA742 => 0xA743 // LATIN CAPITAL LETTER K WITH DIAGONAL STROKE - case 0xA744 => 0xA745 // LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE - case 0xA746 => 0xA747 // LATIN CAPITAL LETTER BROKEN L - case 0xA748 => 0xA749 // LATIN CAPITAL LETTER L WITH HIGH STROKE - case 0xA74A => 0xA74B // LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY - case 0xA74C => 0xA74D // LATIN CAPITAL LETTER O WITH LOOP - case 0xA74E => 0xA74F // LATIN CAPITAL LETTER OO - case 0xA750 => 0xA751 // LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER - case 0xA752 => 0xA753 // LATIN CAPITAL LETTER P WITH FLOURISH - case 0xA754 => 0xA755 // LATIN CAPITAL LETTER P WITH SQUIRREL TAIL - case 0xA756 => 0xA757 // LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER - case 0xA758 => 0xA759 // LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE - case 0xA75A => 0xA75B // LATIN CAPITAL LETTER R ROTUNDA - case 0xA75C => 0xA75D // LATIN CAPITAL LETTER RUM ROTUNDA - case 0xA75E => 0xA75F // LATIN CAPITAL LETTER V WITH DIAGONAL STROKE - case 0xA760 => 0xA761 // LATIN CAPITAL LETTER VY - case 0xA762 => 0xA763 // LATIN CAPITAL LETTER VISIGOTHIC Z - case 0xA764 => 0xA765 // LATIN CAPITAL LETTER THORN WITH STROKE - case 0xA766 => 0xA767 // LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER - case 0xA768 => 0xA769 // LATIN CAPITAL LETTER VEND - case 0xA76A => 0xA76B // LATIN CAPITAL LETTER ET - case 0xA76C => 0xA76D // LATIN CAPITAL LETTER IS - case 0xA76E => 0xA76F // LATIN CAPITAL LETTER CON - case 0xA779 => 0xA77A // LATIN CAPITAL LETTER INSULAR D - case 0xA77B => 0xA77C // LATIN CAPITAL LETTER INSULAR F - case 0xA77D => 0x1D79 // LATIN CAPITAL LETTER INSULAR G - case 0xA77E => 0xA77F // LATIN CAPITAL LETTER TURNED INSULAR G - case 0xA780 => 0xA781 // LATIN CAPITAL LETTER TURNED L - case 0xA782 => 0xA783 // LATIN CAPITAL LETTER INSULAR R - case 0xA784 => 0xA785 // LATIN CAPITAL LETTER INSULAR S - case 0xA786 => 0xA787 // LATIN CAPITAL LETTER INSULAR T - case 0xA78B => 0xA78C // LATIN CAPITAL LETTER SALTILLO - case 0xA78D => 0x0265 // LATIN CAPITAL LETTER TURNED H - case 0xA790 => 0xA791 // LATIN CAPITAL LETTER N WITH DESCENDER - case 0xA792 => 0xA793 // LATIN CAPITAL LETTER C WITH BAR - case 0xA796 => 0xA797 // LATIN CAPITAL LETTER B WITH FLOURISH - case 0xA798 => 0xA799 // LATIN CAPITAL LETTER F WITH STROKE - case 0xA79A => 0xA79B // LATIN CAPITAL LETTER VOLAPUK AE - case 0xA79C => 0xA79D // LATIN CAPITAL LETTER VOLAPUK OE - case 0xA79E => 0xA79F // LATIN CAPITAL LETTER VOLAPUK UE - case 0xA7A0 => 0xA7A1 // LATIN CAPITAL LETTER G WITH OBLIQUE STROKE - case 0xA7A2 => 0xA7A3 // LATIN CAPITAL LETTER K WITH OBLIQUE STROKE - case 0xA7A4 => 0xA7A5 // LATIN CAPITAL LETTER N WITH OBLIQUE STROKE - case 0xA7A6 => 0xA7A7 // LATIN CAPITAL LETTER R WITH OBLIQUE STROKE - case 0xA7A8 => 0xA7A9 // LATIN CAPITAL LETTER S WITH OBLIQUE STROKE - case 0xA7AA => 0x0266 // LATIN CAPITAL LETTER H WITH HOOK - case 0xA7AB => 0x025C // LATIN CAPITAL LETTER REVERSED OPEN E - case 0xA7AC => 0x0261 // LATIN CAPITAL LETTER SCRIPT G - case 0xA7AD => 0x026C // LATIN CAPITAL LETTER L WITH BELT - case 0xA7AE => 0x026A // LATIN CAPITAL LETTER SMALL CAPITAL I - case 0xA7B0 => 0x029E // LATIN CAPITAL LETTER TURNED K - case 0xA7B1 => 0x0287 // LATIN CAPITAL LETTER TURNED T - case 0xA7B2 => 0x029D // LATIN CAPITAL LETTER J WITH CROSSED-TAIL - case 0xA7B3 => 0xAB53 // LATIN CAPITAL LETTER CHI - case 0xA7B4 => 0xA7B5 // LATIN CAPITAL LETTER BETA - case 0xA7B6 => 0xA7B7 // LATIN CAPITAL LETTER OMEGA - case 0xA7B8 => 0xA7B9 // LATIN CAPITAL LETTER U WITH STROKE - case 0xA7BA => 0xA7BB // LATIN CAPITAL LETTER GLOTTAL A - case 0xA7BC => 0xA7BD // LATIN CAPITAL LETTER GLOTTAL I - case 0xA7BE => 0xA7BF // LATIN CAPITAL LETTER GLOTTAL U - case 0xA7C0 => 0xA7C1 // LATIN CAPITAL LETTER OLD POLISH O - case 0xA7C2 => 0xA7C3 // LATIN CAPITAL LETTER ANGLICANA W - case 0xA7C4 => 0xA794 // LATIN CAPITAL LETTER C WITH PALATAL HOOK - case 0xA7C5 => 0x0282 // LATIN CAPITAL LETTER S WITH HOOK - case 0xA7C6 => 0x1D8E // LATIN CAPITAL LETTER Z WITH PALATAL HOOK - case 0xA7C7 => 0xA7C8 // LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY - case 0xA7C9 => 0xA7CA // LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY - case 0xA7D0 => 0xA7D1 // LATIN CAPITAL LETTER CLOSED INSULAR G - case 0xA7D6 => 0xA7D7 // LATIN CAPITAL LETTER MIDDLE SCOTS S - case 0xA7D8 => 0xA7D9 // LATIN CAPITAL LETTER SIGMOID S - case 0xA7F5 => 0xA7F6 // LATIN CAPITAL LETTER REVERSED HALF H - case 0xAB70 => 0x13A0 // CHEROKEE SMALL LETTER A - case 0xAB71 => 0x13A1 // CHEROKEE SMALL LETTER E - case 0xAB72 => 0x13A2 // CHEROKEE SMALL LETTER I - case 0xAB73 => 0x13A3 // CHEROKEE SMALL LETTER O - case 0xAB74 => 0x13A4 // CHEROKEE SMALL LETTER U - case 0xAB75 => 0x13A5 // CHEROKEE SMALL LETTER V - case 0xAB76 => 0x13A6 // CHEROKEE SMALL LETTER GA - case 0xAB77 => 0x13A7 // CHEROKEE SMALL LETTER KA - case 0xAB78 => 0x13A8 // CHEROKEE SMALL LETTER GE - case 0xAB79 => 0x13A9 // CHEROKEE SMALL LETTER GI - case 0xAB7A => 0x13AA // CHEROKEE SMALL LETTER GO - case 0xAB7B => 0x13AB // CHEROKEE SMALL LETTER GU - case 0xAB7C => 0x13AC // CHEROKEE SMALL LETTER GV - case 0xAB7D => 0x13AD // CHEROKEE SMALL LETTER HA - case 0xAB7E => 0x13AE // CHEROKEE SMALL LETTER HE - case 0xAB7F => 0x13AF // CHEROKEE SMALL LETTER HI - case 0xAB80 => 0x13B0 // CHEROKEE SMALL LETTER HO - case 0xAB81 => 0x13B1 // CHEROKEE SMALL LETTER HU - case 0xAB82 => 0x13B2 // CHEROKEE SMALL LETTER HV - case 0xAB83 => 0x13B3 // CHEROKEE SMALL LETTER LA - case 0xAB84 => 0x13B4 // CHEROKEE SMALL LETTER LE - case 0xAB85 => 0x13B5 // CHEROKEE SMALL LETTER LI - case 0xAB86 => 0x13B6 // CHEROKEE SMALL LETTER LO - case 0xAB87 => 0x13B7 // CHEROKEE SMALL LETTER LU - case 0xAB88 => 0x13B8 // CHEROKEE SMALL LETTER LV - case 0xAB89 => 0x13B9 // CHEROKEE SMALL LETTER MA - case 0xAB8A => 0x13BA // CHEROKEE SMALL LETTER ME - case 0xAB8B => 0x13BB // CHEROKEE SMALL LETTER MI - case 0xAB8C => 0x13BC // CHEROKEE SMALL LETTER MO - case 0xAB8D => 0x13BD // CHEROKEE SMALL LETTER MU - case 0xAB8E => 0x13BE // CHEROKEE SMALL LETTER NA - case 0xAB8F => 0x13BF // CHEROKEE SMALL LETTER HNA - case 0xAB90 => 0x13C0 // CHEROKEE SMALL LETTER NAH - case 0xAB91 => 0x13C1 // CHEROKEE SMALL LETTER NE - case 0xAB92 => 0x13C2 // CHEROKEE SMALL LETTER NI - case 0xAB93 => 0x13C3 // CHEROKEE SMALL LETTER NO - case 0xAB94 => 0x13C4 // CHEROKEE SMALL LETTER NU - case 0xAB95 => 0x13C5 // CHEROKEE SMALL LETTER NV - case 0xAB96 => 0x13C6 // CHEROKEE SMALL LETTER QUA - case 0xAB97 => 0x13C7 // CHEROKEE SMALL LETTER QUE - case 0xAB98 => 0x13C8 // CHEROKEE SMALL LETTER QUI - case 0xAB99 => 0x13C9 // CHEROKEE SMALL LETTER QUO - case 0xAB9A => 0x13CA // CHEROKEE SMALL LETTER QUU - case 0xAB9B => 0x13CB // CHEROKEE SMALL LETTER QUV - case 0xAB9C => 0x13CC // CHEROKEE SMALL LETTER SA - case 0xAB9D => 0x13CD // CHEROKEE SMALL LETTER S - case 0xAB9E => 0x13CE // CHEROKEE SMALL LETTER SE - case 0xAB9F => 0x13CF // CHEROKEE SMALL LETTER SI - case 0xABA0 => 0x13D0 // CHEROKEE SMALL LETTER SO - case 0xABA1 => 0x13D1 // CHEROKEE SMALL LETTER SU - case 0xABA2 => 0x13D2 // CHEROKEE SMALL LETTER SV - case 0xABA3 => 0x13D3 // CHEROKEE SMALL LETTER DA - case 0xABA4 => 0x13D4 // CHEROKEE SMALL LETTER TA - case 0xABA5 => 0x13D5 // CHEROKEE SMALL LETTER DE - case 0xABA6 => 0x13D6 // CHEROKEE SMALL LETTER TE - case 0xABA7 => 0x13D7 // CHEROKEE SMALL LETTER DI - case 0xABA8 => 0x13D8 // CHEROKEE SMALL LETTER TI - case 0xABA9 => 0x13D9 // CHEROKEE SMALL LETTER DO - case 0xABAA => 0x13DA // CHEROKEE SMALL LETTER DU - case 0xABAB => 0x13DB // CHEROKEE SMALL LETTER DV - case 0xABAC => 0x13DC // CHEROKEE SMALL LETTER DLA - case 0xABAD => 0x13DD // CHEROKEE SMALL LETTER TLA - case 0xABAE => 0x13DE // CHEROKEE SMALL LETTER TLE - case 0xABAF => 0x13DF // CHEROKEE SMALL LETTER TLI - case 0xABB0 => 0x13E0 // CHEROKEE SMALL LETTER TLO - case 0xABB1 => 0x13E1 // CHEROKEE SMALL LETTER TLU - case 0xABB2 => 0x13E2 // CHEROKEE SMALL LETTER TLV - case 0xABB3 => 0x13E3 // CHEROKEE SMALL LETTER TSA - case 0xABB4 => 0x13E4 // CHEROKEE SMALL LETTER TSE - case 0xABB5 => 0x13E5 // CHEROKEE SMALL LETTER TSI - case 0xABB6 => 0x13E6 // CHEROKEE SMALL LETTER TSO - case 0xABB7 => 0x13E7 // CHEROKEE SMALL LETTER TSU - case 0xABB8 => 0x13E8 // CHEROKEE SMALL LETTER TSV - case 0xABB9 => 0x13E9 // CHEROKEE SMALL LETTER WA - case 0xABBA => 0x13EA // CHEROKEE SMALL LETTER WE - case 0xABBB => 0x13EB // CHEROKEE SMALL LETTER WI - case 0xABBC => 0x13EC // CHEROKEE SMALL LETTER WO - case 0xABBD => 0x13ED // CHEROKEE SMALL LETTER WU - case 0xABBE => 0x13EE // CHEROKEE SMALL LETTER WV - case 0xABBF => 0x13EF // CHEROKEE SMALL LETTER YA - case 0xFF21 => 0xFF41 // FULLWIDTH LATIN CAPITAL LETTER A - case 0xFF22 => 0xFF42 // FULLWIDTH LATIN CAPITAL LETTER B - case 0xFF23 => 0xFF43 // FULLWIDTH LATIN CAPITAL LETTER C - case 0xFF24 => 0xFF44 // FULLWIDTH LATIN CAPITAL LETTER D - case 0xFF25 => 0xFF45 // FULLWIDTH LATIN CAPITAL LETTER E - case 0xFF26 => 0xFF46 // FULLWIDTH LATIN CAPITAL LETTER F - case 0xFF27 => 0xFF47 // FULLWIDTH LATIN CAPITAL LETTER G - case 0xFF28 => 0xFF48 // FULLWIDTH LATIN CAPITAL LETTER H - case 0xFF29 => 0xFF49 // FULLWIDTH LATIN CAPITAL LETTER I - case 0xFF2A => 0xFF4A // FULLWIDTH LATIN CAPITAL LETTER J - case 0xFF2B => 0xFF4B // FULLWIDTH LATIN CAPITAL LETTER K - case 0xFF2C => 0xFF4C // FULLWIDTH LATIN CAPITAL LETTER L - case 0xFF2D => 0xFF4D // FULLWIDTH LATIN CAPITAL LETTER M - case 0xFF2E => 0xFF4E // FULLWIDTH LATIN CAPITAL LETTER N - case 0xFF2F => 0xFF4F // FULLWIDTH LATIN CAPITAL LETTER O - case 0xFF30 => 0xFF50 // FULLWIDTH LATIN CAPITAL LETTER P - case 0xFF31 => 0xFF51 // FULLWIDTH LATIN CAPITAL LETTER Q - case 0xFF32 => 0xFF52 // FULLWIDTH LATIN CAPITAL LETTER R - case 0xFF33 => 0xFF53 // FULLWIDTH LATIN CAPITAL LETTER S - case 0xFF34 => 0xFF54 // FULLWIDTH LATIN CAPITAL LETTER T - case 0xFF35 => 0xFF55 // FULLWIDTH LATIN CAPITAL LETTER U - case 0xFF36 => 0xFF56 // FULLWIDTH LATIN CAPITAL LETTER V - case 0xFF37 => 0xFF57 // FULLWIDTH LATIN CAPITAL LETTER W - case 0xFF38 => 0xFF58 // FULLWIDTH LATIN CAPITAL LETTER X - case 0xFF39 => 0xFF59 // FULLWIDTH LATIN CAPITAL LETTER Y - case 0xFF3A => 0xFF5A // FULLWIDTH LATIN CAPITAL LETTER Z + case 0x24b6 => 0x24d0 // CIRCLED LATIN CAPITAL LETTER A + case 0x24b7 => 0x24d1 // CIRCLED LATIN CAPITAL LETTER B + case 0x24b8 => 0x24d2 // CIRCLED LATIN CAPITAL LETTER C + case 0x24b9 => 0x24d3 // CIRCLED LATIN CAPITAL LETTER D + case 0x24ba => 0x24d4 // CIRCLED LATIN CAPITAL LETTER E + case 0x24bb => 0x24d5 // CIRCLED LATIN CAPITAL LETTER F + case 0x24bc => 0x24d6 // CIRCLED LATIN CAPITAL LETTER G + case 0x24bd => 0x24d7 // CIRCLED LATIN CAPITAL LETTER H + case 0x24be => 0x24d8 // CIRCLED LATIN CAPITAL LETTER I + case 0x24bf => 0x24d9 // CIRCLED LATIN CAPITAL LETTER J + case 0x24c0 => 0x24da // CIRCLED LATIN CAPITAL LETTER K + case 0x24c1 => 0x24db // CIRCLED LATIN CAPITAL LETTER L + case 0x24c2 => 0x24dc // CIRCLED LATIN CAPITAL LETTER M + case 0x24c3 => 0x24dd // CIRCLED LATIN CAPITAL LETTER N + case 0x24c4 => 0x24de // CIRCLED LATIN CAPITAL LETTER O + case 0x24c5 => 0x24df // CIRCLED LATIN CAPITAL LETTER P + case 0x24c6 => 0x24e0 // CIRCLED LATIN CAPITAL LETTER Q + case 0x24c7 => 0x24e1 // CIRCLED LATIN CAPITAL LETTER R + case 0x24c8 => 0x24e2 // CIRCLED LATIN CAPITAL LETTER S + case 0x24c9 => 0x24e3 // CIRCLED LATIN CAPITAL LETTER T + case 0x24ca => 0x24e4 // CIRCLED LATIN CAPITAL LETTER U + case 0x24cb => 0x24e5 // CIRCLED LATIN CAPITAL LETTER V + case 0x24cc => 0x24e6 // CIRCLED LATIN CAPITAL LETTER W + case 0x24cd => 0x24e7 // CIRCLED LATIN CAPITAL LETTER X + case 0x24ce => 0x24e8 // CIRCLED LATIN CAPITAL LETTER Y + case 0x24cf => 0x24e9 // CIRCLED LATIN CAPITAL LETTER Z + case 0x2c00 => 0x2c30 // GLAGOLITIC CAPITAL LETTER AZU + case 0x2c01 => 0x2c31 // GLAGOLITIC CAPITAL LETTER BUKY + case 0x2c02 => 0x2c32 // GLAGOLITIC CAPITAL LETTER VEDE + case 0x2c03 => 0x2c33 // GLAGOLITIC CAPITAL LETTER GLAGOLI + case 0x2c04 => 0x2c34 // GLAGOLITIC CAPITAL LETTER DOBRO + case 0x2c05 => 0x2c35 // GLAGOLITIC CAPITAL LETTER YESTU + case 0x2c06 => 0x2c36 // GLAGOLITIC CAPITAL LETTER ZHIVETE + case 0x2c07 => 0x2c37 // GLAGOLITIC CAPITAL LETTER DZELO + case 0x2c08 => 0x2c38 // GLAGOLITIC CAPITAL LETTER ZEMLJA + case 0x2c09 => 0x2c39 // GLAGOLITIC CAPITAL LETTER IZHE + case 0x2c0a => 0x2c3a // GLAGOLITIC CAPITAL LETTER INITIAL IZHE + case 0x2c0b => 0x2c3b // GLAGOLITIC CAPITAL LETTER I + case 0x2c0c => 0x2c3c // GLAGOLITIC CAPITAL LETTER DJERVI + case 0x2c0d => 0x2c3d // GLAGOLITIC CAPITAL LETTER KAKO + case 0x2c0e => 0x2c3e // GLAGOLITIC CAPITAL LETTER LJUDIJE + case 0x2c0f => 0x2c3f // GLAGOLITIC CAPITAL LETTER MYSLITE + case 0x2c10 => 0x2c40 // GLAGOLITIC CAPITAL LETTER NASHI + case 0x2c11 => 0x2c41 // GLAGOLITIC CAPITAL LETTER ONU + case 0x2c12 => 0x2c42 // GLAGOLITIC CAPITAL LETTER POKOJI + case 0x2c13 => 0x2c43 // GLAGOLITIC CAPITAL LETTER RITSI + case 0x2c14 => 0x2c44 // GLAGOLITIC CAPITAL LETTER SLOVO + case 0x2c15 => 0x2c45 // GLAGOLITIC CAPITAL LETTER TVRIDO + case 0x2c16 => 0x2c46 // GLAGOLITIC CAPITAL LETTER UKU + case 0x2c17 => 0x2c47 // GLAGOLITIC CAPITAL LETTER FRITU + case 0x2c18 => 0x2c48 // GLAGOLITIC CAPITAL LETTER HERU + case 0x2c19 => 0x2c49 // GLAGOLITIC CAPITAL LETTER OTU + case 0x2c1a => 0x2c4a // GLAGOLITIC CAPITAL LETTER PE + case 0x2c1b => 0x2c4b // GLAGOLITIC CAPITAL LETTER SHTA + case 0x2c1c => 0x2c4c // GLAGOLITIC CAPITAL LETTER TSI + case 0x2c1d => 0x2c4d // GLAGOLITIC CAPITAL LETTER CHRIVI + case 0x2c1e => 0x2c4e // GLAGOLITIC CAPITAL LETTER SHA + case 0x2c1f => 0x2c4f // GLAGOLITIC CAPITAL LETTER YERU + case 0x2c20 => 0x2c50 // GLAGOLITIC CAPITAL LETTER YERI + case 0x2c21 => 0x2c51 // GLAGOLITIC CAPITAL LETTER YATI + case 0x2c22 => 0x2c52 // GLAGOLITIC CAPITAL LETTER SPIDERY HA + case 0x2c23 => 0x2c53 // GLAGOLITIC CAPITAL LETTER YU + case 0x2c24 => 0x2c54 // GLAGOLITIC CAPITAL LETTER SMALL YUS + case 0x2c25 => 0x2c55 // GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL + case 0x2c26 => 0x2c56 // GLAGOLITIC CAPITAL LETTER YO + case 0x2c27 => 0x2c57 // GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS + case 0x2c28 => 0x2c58 // GLAGOLITIC CAPITAL LETTER BIG YUS + case 0x2c29 => 0x2c59 // GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS + case 0x2c2a => 0x2c5a // GLAGOLITIC CAPITAL LETTER FITA + case 0x2c2b => 0x2c5b // GLAGOLITIC CAPITAL LETTER IZHITSA + case 0x2c2c => 0x2c5c // GLAGOLITIC CAPITAL LETTER SHTAPIC + case 0x2c2d => 0x2c5d // GLAGOLITIC CAPITAL LETTER TROKUTASTI A + case 0x2c2e => 0x2c5e // GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE + case 0x2c2f => 0x2c5f // GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI + case 0x2c60 => 0x2c61 // LATIN CAPITAL LETTER L WITH DOUBLE BAR + case 0x2c62 => 0x026b // LATIN CAPITAL LETTER L WITH MIDDLE TILDE + case 0x2c63 => 0x1d7d // LATIN CAPITAL LETTER P WITH STROKE + case 0x2c64 => 0x027d // LATIN CAPITAL LETTER R WITH TAIL + case 0x2c67 => 0x2c68 // LATIN CAPITAL LETTER H WITH DESCENDER + case 0x2c69 => 0x2c6a // LATIN CAPITAL LETTER K WITH DESCENDER + case 0x2c6b => 0x2c6c // LATIN CAPITAL LETTER Z WITH DESCENDER + case 0x2c6d => 0x0251 // LATIN CAPITAL LETTER ALPHA + case 0x2c6e => 0x0271 // LATIN CAPITAL LETTER M WITH HOOK + case 0x2c6f => 0x0250 // LATIN CAPITAL LETTER TURNED A + case 0x2c70 => 0x0252 // LATIN CAPITAL LETTER TURNED ALPHA + case 0x2c72 => 0x2c73 // LATIN CAPITAL LETTER W WITH HOOK + case 0x2c75 => 0x2c76 // LATIN CAPITAL LETTER HALF H + case 0x2c7e => 0x023f // LATIN CAPITAL LETTER S WITH SWASH TAIL + case 0x2c7f => 0x0240 // LATIN CAPITAL LETTER Z WITH SWASH TAIL + case 0x2c80 => 0x2c81 // COPTIC CAPITAL LETTER ALFA + case 0x2c82 => 0x2c83 // COPTIC CAPITAL LETTER VIDA + case 0x2c84 => 0x2c85 // COPTIC CAPITAL LETTER GAMMA + case 0x2c86 => 0x2c87 // COPTIC CAPITAL LETTER DALDA + case 0x2c88 => 0x2c89 // COPTIC CAPITAL LETTER EIE + case 0x2c8a => 0x2c8b // COPTIC CAPITAL LETTER SOU + case 0x2c8c => 0x2c8d // COPTIC CAPITAL LETTER ZATA + case 0x2c8e => 0x2c8f // COPTIC CAPITAL LETTER HATE + case 0x2c90 => 0x2c91 // COPTIC CAPITAL LETTER THETHE + case 0x2c92 => 0x2c93 // COPTIC CAPITAL LETTER IAUDA + case 0x2c94 => 0x2c95 // COPTIC CAPITAL LETTER KAPA + case 0x2c96 => 0x2c97 // COPTIC CAPITAL LETTER LAULA + case 0x2c98 => 0x2c99 // COPTIC CAPITAL LETTER MI + case 0x2c9a => 0x2c9b // COPTIC CAPITAL LETTER NI + case 0x2c9c => 0x2c9d // COPTIC CAPITAL LETTER KSI + case 0x2c9e => 0x2c9f // COPTIC CAPITAL LETTER O + case 0x2ca0 => 0x2ca1 // COPTIC CAPITAL LETTER PI + case 0x2ca2 => 0x2ca3 // COPTIC CAPITAL LETTER RO + case 0x2ca4 => 0x2ca5 // COPTIC CAPITAL LETTER SIMA + case 0x2ca6 => 0x2ca7 // COPTIC CAPITAL LETTER TAU + case 0x2ca8 => 0x2ca9 // COPTIC CAPITAL LETTER UA + case 0x2caa => 0x2cab // COPTIC CAPITAL LETTER FI + case 0x2cac => 0x2cad // COPTIC CAPITAL LETTER KHI + case 0x2cae => 0x2caf // COPTIC CAPITAL LETTER PSI + case 0x2cb0 => 0x2cb1 // COPTIC CAPITAL LETTER OOU + case 0x2cb2 => 0x2cb3 // COPTIC CAPITAL LETTER DIALECT-P ALEF + case 0x2cb4 => 0x2cb5 // COPTIC CAPITAL LETTER OLD COPTIC AIN + case 0x2cb6 => 0x2cb7 // COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE + case 0x2cb8 => 0x2cb9 // COPTIC CAPITAL LETTER DIALECT-P KAPA + case 0x2cba => 0x2cbb // COPTIC CAPITAL LETTER DIALECT-P NI + case 0x2cbc => 0x2cbd // COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI + case 0x2cbe => 0x2cbf // COPTIC CAPITAL LETTER OLD COPTIC OOU + case 0x2cc0 => 0x2cc1 // COPTIC CAPITAL LETTER SAMPI + case 0x2cc2 => 0x2cc3 // COPTIC CAPITAL LETTER CROSSED SHEI + case 0x2cc4 => 0x2cc5 // COPTIC CAPITAL LETTER OLD COPTIC SHEI + case 0x2cc6 => 0x2cc7 // COPTIC CAPITAL LETTER OLD COPTIC ESH + case 0x2cc8 => 0x2cc9 // COPTIC CAPITAL LETTER AKHMIMIC KHEI + case 0x2cca => 0x2ccb // COPTIC CAPITAL LETTER DIALECT-P HORI + case 0x2ccc => 0x2ccd // COPTIC CAPITAL LETTER OLD COPTIC HORI + case 0x2cce => 0x2ccf // COPTIC CAPITAL LETTER OLD COPTIC HA + case 0x2cd0 => 0x2cd1 // COPTIC CAPITAL LETTER L-SHAPED HA + case 0x2cd2 => 0x2cd3 // COPTIC CAPITAL LETTER OLD COPTIC HEI + case 0x2cd4 => 0x2cd5 // COPTIC CAPITAL LETTER OLD COPTIC HAT + case 0x2cd6 => 0x2cd7 // COPTIC CAPITAL LETTER OLD COPTIC GANGIA + case 0x2cd8 => 0x2cd9 // COPTIC CAPITAL LETTER OLD COPTIC DJA + case 0x2cda => 0x2cdb // COPTIC CAPITAL LETTER OLD COPTIC SHIMA + case 0x2cdc => 0x2cdd // COPTIC CAPITAL LETTER OLD NUBIAN SHIMA + case 0x2cde => 0x2cdf // COPTIC CAPITAL LETTER OLD NUBIAN NGI + case 0x2ce0 => 0x2ce1 // COPTIC CAPITAL LETTER OLD NUBIAN NYI + case 0x2ce2 => 0x2ce3 // COPTIC CAPITAL LETTER OLD NUBIAN WAU + case 0x2ceb => 0x2cec // COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI + case 0x2ced => 0x2cee // COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA + case 0x2cf2 => 0x2cf3 // COPTIC CAPITAL LETTER BOHAIRIC KHEI + case 0xa640 => 0xa641 // CYRILLIC CAPITAL LETTER ZEMLYA + case 0xa642 => 0xa643 // CYRILLIC CAPITAL LETTER DZELO + case 0xa644 => 0xa645 // CYRILLIC CAPITAL LETTER REVERSED DZE + case 0xa646 => 0xa647 // CYRILLIC CAPITAL LETTER IOTA + case 0xa648 => 0xa649 // CYRILLIC CAPITAL LETTER DJERV + case 0xa64a => 0xa64b // CYRILLIC CAPITAL LETTER MONOGRAPH UK + case 0xa64c => 0xa64d // CYRILLIC CAPITAL LETTER BROAD OMEGA + case 0xa64e => 0xa64f // CYRILLIC CAPITAL LETTER NEUTRAL YER + case 0xa650 => 0xa651 // CYRILLIC CAPITAL LETTER YERU WITH BACK YER + case 0xa652 => 0xa653 // CYRILLIC CAPITAL LETTER IOTIFIED YAT + case 0xa654 => 0xa655 // CYRILLIC CAPITAL LETTER REVERSED YU + case 0xa656 => 0xa657 // CYRILLIC CAPITAL LETTER IOTIFIED A + case 0xa658 => 0xa659 // CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS + case 0xa65a => 0xa65b // CYRILLIC CAPITAL LETTER BLENDED YUS + case 0xa65c => 0xa65d // CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS + case 0xa65e => 0xa65f // CYRILLIC CAPITAL LETTER YN + case 0xa660 => 0xa661 // CYRILLIC CAPITAL LETTER REVERSED TSE + case 0xa662 => 0xa663 // CYRILLIC CAPITAL LETTER SOFT DE + case 0xa664 => 0xa665 // CYRILLIC CAPITAL LETTER SOFT EL + case 0xa666 => 0xa667 // CYRILLIC CAPITAL LETTER SOFT EM + case 0xa668 => 0xa669 // CYRILLIC CAPITAL LETTER MONOCULAR O + case 0xa66a => 0xa66b // CYRILLIC CAPITAL LETTER BINOCULAR O + case 0xa66c => 0xa66d // CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O + case 0xa680 => 0xa681 // CYRILLIC CAPITAL LETTER DWE + case 0xa682 => 0xa683 // CYRILLIC CAPITAL LETTER DZWE + case 0xa684 => 0xa685 // CYRILLIC CAPITAL LETTER ZHWE + case 0xa686 => 0xa687 // CYRILLIC CAPITAL LETTER CCHE + case 0xa688 => 0xa689 // CYRILLIC CAPITAL LETTER DZZE + case 0xa68a => 0xa68b // CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK + case 0xa68c => 0xa68d // CYRILLIC CAPITAL LETTER TWE + case 0xa68e => 0xa68f // CYRILLIC CAPITAL LETTER TSWE + case 0xa690 => 0xa691 // CYRILLIC CAPITAL LETTER TSSE + case 0xa692 => 0xa693 // CYRILLIC CAPITAL LETTER TCHE + case 0xa694 => 0xa695 // CYRILLIC CAPITAL LETTER HWE + case 0xa696 => 0xa697 // CYRILLIC CAPITAL LETTER SHWE + case 0xa698 => 0xa699 // CYRILLIC CAPITAL LETTER DOUBLE O + case 0xa69a => 0xa69b // CYRILLIC CAPITAL LETTER CROSSED O + case 0xa722 => 0xa723 // LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF + case 0xa724 => 0xa725 // LATIN CAPITAL LETTER EGYPTOLOGICAL AIN + case 0xa726 => 0xa727 // LATIN CAPITAL LETTER HENG + case 0xa728 => 0xa729 // LATIN CAPITAL LETTER TZ + case 0xa72a => 0xa72b // LATIN CAPITAL LETTER TRESILLO + case 0xa72c => 0xa72d // LATIN CAPITAL LETTER CUATRILLO + case 0xa72e => 0xa72f // LATIN CAPITAL LETTER CUATRILLO WITH COMMA + case 0xa732 => 0xa733 // LATIN CAPITAL LETTER AA + case 0xa734 => 0xa735 // LATIN CAPITAL LETTER AO + case 0xa736 => 0xa737 // LATIN CAPITAL LETTER AU + case 0xa738 => 0xa739 // LATIN CAPITAL LETTER AV + case 0xa73a => 0xa73b // LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR + case 0xa73c => 0xa73d // LATIN CAPITAL LETTER AY + case 0xa73e => 0xa73f // LATIN CAPITAL LETTER REVERSED C WITH DOT + case 0xa740 => 0xa741 // LATIN CAPITAL LETTER K WITH STROKE + case 0xa742 => 0xa743 // LATIN CAPITAL LETTER K WITH DIAGONAL STROKE + case 0xa744 => 0xa745 // LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE + case 0xa746 => 0xa747 // LATIN CAPITAL LETTER BROKEN L + case 0xa748 => 0xa749 // LATIN CAPITAL LETTER L WITH HIGH STROKE + case 0xa74a => 0xa74b // LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY + case 0xa74c => 0xa74d // LATIN CAPITAL LETTER O WITH LOOP + case 0xa74e => 0xa74f // LATIN CAPITAL LETTER OO + case 0xa750 => 0xa751 // LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER + case 0xa752 => 0xa753 // LATIN CAPITAL LETTER P WITH FLOURISH + case 0xa754 => 0xa755 // LATIN CAPITAL LETTER P WITH SQUIRREL TAIL + case 0xa756 => 0xa757 // LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER + case 0xa758 => 0xa759 // LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE + case 0xa75a => 0xa75b // LATIN CAPITAL LETTER R ROTUNDA + case 0xa75c => 0xa75d // LATIN CAPITAL LETTER RUM ROTUNDA + case 0xa75e => 0xa75f // LATIN CAPITAL LETTER V WITH DIAGONAL STROKE + case 0xa760 => 0xa761 // LATIN CAPITAL LETTER VY + case 0xa762 => 0xa763 // LATIN CAPITAL LETTER VISIGOTHIC Z + case 0xa764 => 0xa765 // LATIN CAPITAL LETTER THORN WITH STROKE + case 0xa766 => 0xa767 // LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER + case 0xa768 => 0xa769 // LATIN CAPITAL LETTER VEND + case 0xa76a => 0xa76b // LATIN CAPITAL LETTER ET + case 0xa76c => 0xa76d // LATIN CAPITAL LETTER IS + case 0xa76e => 0xa76f // LATIN CAPITAL LETTER CON + case 0xa779 => 0xa77a // LATIN CAPITAL LETTER INSULAR D + case 0xa77b => 0xa77c // LATIN CAPITAL LETTER INSULAR F + case 0xa77d => 0x1d79 // LATIN CAPITAL LETTER INSULAR G + case 0xa77e => 0xa77f // LATIN CAPITAL LETTER TURNED INSULAR G + case 0xa780 => 0xa781 // LATIN CAPITAL LETTER TURNED L + case 0xa782 => 0xa783 // LATIN CAPITAL LETTER INSULAR R + case 0xa784 => 0xa785 // LATIN CAPITAL LETTER INSULAR S + case 0xa786 => 0xa787 // LATIN CAPITAL LETTER INSULAR T + case 0xa78b => 0xa78c // LATIN CAPITAL LETTER SALTILLO + case 0xa78d => 0x0265 // LATIN CAPITAL LETTER TURNED H + case 0xa790 => 0xa791 // LATIN CAPITAL LETTER N WITH DESCENDER + case 0xa792 => 0xa793 // LATIN CAPITAL LETTER C WITH BAR + case 0xa796 => 0xa797 // LATIN CAPITAL LETTER B WITH FLOURISH + case 0xa798 => 0xa799 // LATIN CAPITAL LETTER F WITH STROKE + case 0xa79a => 0xa79b // LATIN CAPITAL LETTER VOLAPUK AE + case 0xa79c => 0xa79d // LATIN CAPITAL LETTER VOLAPUK OE + case 0xa79e => 0xa79f // LATIN CAPITAL LETTER VOLAPUK UE + case 0xa7a0 => 0xa7a1 // LATIN CAPITAL LETTER G WITH OBLIQUE STROKE + case 0xa7a2 => 0xa7a3 // LATIN CAPITAL LETTER K WITH OBLIQUE STROKE + case 0xa7a4 => 0xa7a5 // LATIN CAPITAL LETTER N WITH OBLIQUE STROKE + case 0xa7a6 => 0xa7a7 // LATIN CAPITAL LETTER R WITH OBLIQUE STROKE + case 0xa7a8 => 0xa7a9 // LATIN CAPITAL LETTER S WITH OBLIQUE STROKE + case 0xa7aa => 0x0266 // LATIN CAPITAL LETTER H WITH HOOK + case 0xa7ab => 0x025c // LATIN CAPITAL LETTER REVERSED OPEN E + case 0xa7ac => 0x0261 // LATIN CAPITAL LETTER SCRIPT G + case 0xa7ad => 0x026c // LATIN CAPITAL LETTER L WITH BELT + case 0xa7ae => 0x026a // LATIN CAPITAL LETTER SMALL CAPITAL I + case 0xa7b0 => 0x029e // LATIN CAPITAL LETTER TURNED K + case 0xa7b1 => 0x0287 // LATIN CAPITAL LETTER TURNED T + case 0xa7b2 => 0x029d // LATIN CAPITAL LETTER J WITH CROSSED-TAIL + case 0xa7b3 => 0xab53 // LATIN CAPITAL LETTER CHI + case 0xa7b4 => 0xa7b5 // LATIN CAPITAL LETTER BETA + case 0xa7b6 => 0xa7b7 // LATIN CAPITAL LETTER OMEGA + case 0xa7b8 => 0xa7b9 // LATIN CAPITAL LETTER U WITH STROKE + case 0xa7ba => 0xa7bb // LATIN CAPITAL LETTER GLOTTAL A + case 0xa7bc => 0xa7bd // LATIN CAPITAL LETTER GLOTTAL I + case 0xa7be => 0xa7bf // LATIN CAPITAL LETTER GLOTTAL U + case 0xa7c0 => 0xa7c1 // LATIN CAPITAL LETTER OLD POLISH O + case 0xa7c2 => 0xa7c3 // LATIN CAPITAL LETTER ANGLICANA W + case 0xa7c4 => 0xa794 // LATIN CAPITAL LETTER C WITH PALATAL HOOK + case 0xa7c5 => 0x0282 // LATIN CAPITAL LETTER S WITH HOOK + case 0xa7c6 => 0x1d8e // LATIN CAPITAL LETTER Z WITH PALATAL HOOK + case 0xa7c7 => 0xa7c8 // LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY + case 0xa7c9 => 0xa7ca // LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY + case 0xa7d0 => 0xa7d1 // LATIN CAPITAL LETTER CLOSED INSULAR G + case 0xa7d6 => 0xa7d7 // LATIN CAPITAL LETTER MIDDLE SCOTS S + case 0xa7d8 => 0xa7d9 // LATIN CAPITAL LETTER SIGMOID S + case 0xa7f5 => 0xa7f6 // LATIN CAPITAL LETTER REVERSED HALF H + case 0xab70 => 0x13a0 // CHEROKEE SMALL LETTER A + case 0xab71 => 0x13a1 // CHEROKEE SMALL LETTER E + case 0xab72 => 0x13a2 // CHEROKEE SMALL LETTER I + case 0xab73 => 0x13a3 // CHEROKEE SMALL LETTER O + case 0xab74 => 0x13a4 // CHEROKEE SMALL LETTER U + case 0xab75 => 0x13a5 // CHEROKEE SMALL LETTER V + case 0xab76 => 0x13a6 // CHEROKEE SMALL LETTER GA + case 0xab77 => 0x13a7 // CHEROKEE SMALL LETTER KA + case 0xab78 => 0x13a8 // CHEROKEE SMALL LETTER GE + case 0xab79 => 0x13a9 // CHEROKEE SMALL LETTER GI + case 0xab7a => 0x13aa // CHEROKEE SMALL LETTER GO + case 0xab7b => 0x13ab // CHEROKEE SMALL LETTER GU + case 0xab7c => 0x13ac // CHEROKEE SMALL LETTER GV + case 0xab7d => 0x13ad // CHEROKEE SMALL LETTER HA + case 0xab7e => 0x13ae // CHEROKEE SMALL LETTER HE + case 0xab7f => 0x13af // CHEROKEE SMALL LETTER HI + case 0xab80 => 0x13b0 // CHEROKEE SMALL LETTER HO + case 0xab81 => 0x13b1 // CHEROKEE SMALL LETTER HU + case 0xab82 => 0x13b2 // CHEROKEE SMALL LETTER HV + case 0xab83 => 0x13b3 // CHEROKEE SMALL LETTER LA + case 0xab84 => 0x13b4 // CHEROKEE SMALL LETTER LE + case 0xab85 => 0x13b5 // CHEROKEE SMALL LETTER LI + case 0xab86 => 0x13b6 // CHEROKEE SMALL LETTER LO + case 0xab87 => 0x13b7 // CHEROKEE SMALL LETTER LU + case 0xab88 => 0x13b8 // CHEROKEE SMALL LETTER LV + case 0xab89 => 0x13b9 // CHEROKEE SMALL LETTER MA + case 0xab8a => 0x13ba // CHEROKEE SMALL LETTER ME + case 0xab8b => 0x13bb // CHEROKEE SMALL LETTER MI + case 0xab8c => 0x13bc // CHEROKEE SMALL LETTER MO + case 0xab8d => 0x13bd // CHEROKEE SMALL LETTER MU + case 0xab8e => 0x13be // CHEROKEE SMALL LETTER NA + case 0xab8f => 0x13bf // CHEROKEE SMALL LETTER HNA + case 0xab90 => 0x13c0 // CHEROKEE SMALL LETTER NAH + case 0xab91 => 0x13c1 // CHEROKEE SMALL LETTER NE + case 0xab92 => 0x13c2 // CHEROKEE SMALL LETTER NI + case 0xab93 => 0x13c3 // CHEROKEE SMALL LETTER NO + case 0xab94 => 0x13c4 // CHEROKEE SMALL LETTER NU + case 0xab95 => 0x13c5 // CHEROKEE SMALL LETTER NV + case 0xab96 => 0x13c6 // CHEROKEE SMALL LETTER QUA + case 0xab97 => 0x13c7 // CHEROKEE SMALL LETTER QUE + case 0xab98 => 0x13c8 // CHEROKEE SMALL LETTER QUI + case 0xab99 => 0x13c9 // CHEROKEE SMALL LETTER QUO + case 0xab9a => 0x13ca // CHEROKEE SMALL LETTER QUU + case 0xab9b => 0x13cb // CHEROKEE SMALL LETTER QUV + case 0xab9c => 0x13cc // CHEROKEE SMALL LETTER SA + case 0xab9d => 0x13cd // CHEROKEE SMALL LETTER S + case 0xab9e => 0x13ce // CHEROKEE SMALL LETTER SE + case 0xab9f => 0x13cf // CHEROKEE SMALL LETTER SI + case 0xaba0 => 0x13d0 // CHEROKEE SMALL LETTER SO + case 0xaba1 => 0x13d1 // CHEROKEE SMALL LETTER SU + case 0xaba2 => 0x13d2 // CHEROKEE SMALL LETTER SV + case 0xaba3 => 0x13d3 // CHEROKEE SMALL LETTER DA + case 0xaba4 => 0x13d4 // CHEROKEE SMALL LETTER TA + case 0xaba5 => 0x13d5 // CHEROKEE SMALL LETTER DE + case 0xaba6 => 0x13d6 // CHEROKEE SMALL LETTER TE + case 0xaba7 => 0x13d7 // CHEROKEE SMALL LETTER DI + case 0xaba8 => 0x13d8 // CHEROKEE SMALL LETTER TI + case 0xaba9 => 0x13d9 // CHEROKEE SMALL LETTER DO + case 0xabaa => 0x13da // CHEROKEE SMALL LETTER DU + case 0xabab => 0x13db // CHEROKEE SMALL LETTER DV + case 0xabac => 0x13dc // CHEROKEE SMALL LETTER DLA + case 0xabad => 0x13dd // CHEROKEE SMALL LETTER TLA + case 0xabae => 0x13de // CHEROKEE SMALL LETTER TLE + case 0xabaf => 0x13df // CHEROKEE SMALL LETTER TLI + case 0xabb0 => 0x13e0 // CHEROKEE SMALL LETTER TLO + case 0xabb1 => 0x13e1 // CHEROKEE SMALL LETTER TLU + case 0xabb2 => 0x13e2 // CHEROKEE SMALL LETTER TLV + case 0xabb3 => 0x13e3 // CHEROKEE SMALL LETTER TSA + case 0xabb4 => 0x13e4 // CHEROKEE SMALL LETTER TSE + case 0xabb5 => 0x13e5 // CHEROKEE SMALL LETTER TSI + case 0xabb6 => 0x13e6 // CHEROKEE SMALL LETTER TSO + case 0xabb7 => 0x13e7 // CHEROKEE SMALL LETTER TSU + case 0xabb8 => 0x13e8 // CHEROKEE SMALL LETTER TSV + case 0xabb9 => 0x13e9 // CHEROKEE SMALL LETTER WA + case 0xabba => 0x13ea // CHEROKEE SMALL LETTER WE + case 0xabbb => 0x13eb // CHEROKEE SMALL LETTER WI + case 0xabbc => 0x13ec // CHEROKEE SMALL LETTER WO + case 0xabbd => 0x13ed // CHEROKEE SMALL LETTER WU + case 0xabbe => 0x13ee // CHEROKEE SMALL LETTER WV + case 0xabbf => 0x13ef // CHEROKEE SMALL LETTER YA + case 0xff21 => 0xff41 // FULLWIDTH LATIN CAPITAL LETTER A + case 0xff22 => 0xff42 // FULLWIDTH LATIN CAPITAL LETTER B + case 0xff23 => 0xff43 // FULLWIDTH LATIN CAPITAL LETTER C + case 0xff24 => 0xff44 // FULLWIDTH LATIN CAPITAL LETTER D + case 0xff25 => 0xff45 // FULLWIDTH LATIN CAPITAL LETTER E + case 0xff26 => 0xff46 // FULLWIDTH LATIN CAPITAL LETTER F + case 0xff27 => 0xff47 // FULLWIDTH LATIN CAPITAL LETTER G + case 0xff28 => 0xff48 // FULLWIDTH LATIN CAPITAL LETTER H + case 0xff29 => 0xff49 // FULLWIDTH LATIN CAPITAL LETTER I + case 0xff2a => 0xff4a // FULLWIDTH LATIN CAPITAL LETTER J + case 0xff2b => 0xff4b // FULLWIDTH LATIN CAPITAL LETTER K + case 0xff2c => 0xff4c // FULLWIDTH LATIN CAPITAL LETTER L + case 0xff2d => 0xff4d // FULLWIDTH LATIN CAPITAL LETTER M + case 0xff2e => 0xff4e // FULLWIDTH LATIN CAPITAL LETTER N + case 0xff2f => 0xff4f // FULLWIDTH LATIN CAPITAL LETTER O + case 0xff30 => 0xff50 // FULLWIDTH LATIN CAPITAL LETTER P + case 0xff31 => 0xff51 // FULLWIDTH LATIN CAPITAL LETTER Q + case 0xff32 => 0xff52 // FULLWIDTH LATIN CAPITAL LETTER R + case 0xff33 => 0xff53 // FULLWIDTH LATIN CAPITAL LETTER S + case 0xff34 => 0xff54 // FULLWIDTH LATIN CAPITAL LETTER T + case 0xff35 => 0xff55 // FULLWIDTH LATIN CAPITAL LETTER U + case 0xff36 => 0xff56 // FULLWIDTH LATIN CAPITAL LETTER V + case 0xff37 => 0xff57 // FULLWIDTH LATIN CAPITAL LETTER W + case 0xff38 => 0xff58 // FULLWIDTH LATIN CAPITAL LETTER X + case 0xff39 => 0xff59 // FULLWIDTH LATIN CAPITAL LETTER Y + case 0xff3a => 0xff5a // FULLWIDTH LATIN CAPITAL LETTER Z case 0x10400 => 0x10428 // DESERET CAPITAL LETTER LONG I case 0x10401 => 0x10429 // DESERET CAPITAL LETTER LONG E - case 0x10402 => 0x1042A // DESERET CAPITAL LETTER LONG A - case 0x10403 => 0x1042B // DESERET CAPITAL LETTER LONG AH - case 0x10404 => 0x1042C // DESERET CAPITAL LETTER LONG O - case 0x10405 => 0x1042D // DESERET CAPITAL LETTER LONG OO - case 0x10406 => 0x1042E // DESERET CAPITAL LETTER SHORT I - case 0x10407 => 0x1042F // DESERET CAPITAL LETTER SHORT E + case 0x10402 => 0x1042a // DESERET CAPITAL LETTER LONG A + case 0x10403 => 0x1042b // DESERET CAPITAL LETTER LONG AH + case 0x10404 => 0x1042c // DESERET CAPITAL LETTER LONG O + case 0x10405 => 0x1042d // DESERET CAPITAL LETTER LONG OO + case 0x10406 => 0x1042e // DESERET CAPITAL LETTER SHORT I + case 0x10407 => 0x1042f // DESERET CAPITAL LETTER SHORT E case 0x10408 => 0x10430 // DESERET CAPITAL LETTER SHORT A case 0x10409 => 0x10431 // DESERET CAPITAL LETTER SHORT AH - case 0x1040A => 0x10432 // DESERET CAPITAL LETTER SHORT O - case 0x1040B => 0x10433 // DESERET CAPITAL LETTER SHORT OO - case 0x1040C => 0x10434 // DESERET CAPITAL LETTER AY - case 0x1040D => 0x10435 // DESERET CAPITAL LETTER OW - case 0x1040E => 0x10436 // DESERET CAPITAL LETTER WU - case 0x1040F => 0x10437 // DESERET CAPITAL LETTER YEE + case 0x1040a => 0x10432 // DESERET CAPITAL LETTER SHORT O + case 0x1040b => 0x10433 // DESERET CAPITAL LETTER SHORT OO + case 0x1040c => 0x10434 // DESERET CAPITAL LETTER AY + case 0x1040d => 0x10435 // DESERET CAPITAL LETTER OW + case 0x1040e => 0x10436 // DESERET CAPITAL LETTER WU + case 0x1040f => 0x10437 // DESERET CAPITAL LETTER YEE case 0x10410 => 0x10438 // DESERET CAPITAL LETTER H case 0x10411 => 0x10439 // DESERET CAPITAL LETTER PEE - case 0x10412 => 0x1043A // DESERET CAPITAL LETTER BEE - case 0x10413 => 0x1043B // DESERET CAPITAL LETTER TEE - case 0x10414 => 0x1043C // DESERET CAPITAL LETTER DEE - case 0x10415 => 0x1043D // DESERET CAPITAL LETTER CHEE - case 0x10416 => 0x1043E // DESERET CAPITAL LETTER JEE - case 0x10417 => 0x1043F // DESERET CAPITAL LETTER KAY + case 0x10412 => 0x1043a // DESERET CAPITAL LETTER BEE + case 0x10413 => 0x1043b // DESERET CAPITAL LETTER TEE + case 0x10414 => 0x1043c // DESERET CAPITAL LETTER DEE + case 0x10415 => 0x1043d // DESERET CAPITAL LETTER CHEE + case 0x10416 => 0x1043e // DESERET CAPITAL LETTER JEE + case 0x10417 => 0x1043f // DESERET CAPITAL LETTER KAY case 0x10418 => 0x10440 // DESERET CAPITAL LETTER GAY case 0x10419 => 0x10441 // DESERET CAPITAL LETTER EF - case 0x1041A => 0x10442 // DESERET CAPITAL LETTER VEE - case 0x1041B => 0x10443 // DESERET CAPITAL LETTER ETH - case 0x1041C => 0x10444 // DESERET CAPITAL LETTER THEE - case 0x1041D => 0x10445 // DESERET CAPITAL LETTER ES - case 0x1041E => 0x10446 // DESERET CAPITAL LETTER ZEE - case 0x1041F => 0x10447 // DESERET CAPITAL LETTER ESH + case 0x1041a => 0x10442 // DESERET CAPITAL LETTER VEE + case 0x1041b => 0x10443 // DESERET CAPITAL LETTER ETH + case 0x1041c => 0x10444 // DESERET CAPITAL LETTER THEE + case 0x1041d => 0x10445 // DESERET CAPITAL LETTER ES + case 0x1041e => 0x10446 // DESERET CAPITAL LETTER ZEE + case 0x1041f => 0x10447 // DESERET CAPITAL LETTER ESH case 0x10420 => 0x10448 // DESERET CAPITAL LETTER ZHEE case 0x10421 => 0x10449 // DESERET CAPITAL LETTER ER - case 0x10422 => 0x1044A // DESERET CAPITAL LETTER EL - case 0x10423 => 0x1044B // DESERET CAPITAL LETTER EM - case 0x10424 => 0x1044C // DESERET CAPITAL LETTER EN - case 0x10425 => 0x1044D // DESERET CAPITAL LETTER ENG - case 0x10426 => 0x1044E // DESERET CAPITAL LETTER OI - case 0x10427 => 0x1044F // DESERET CAPITAL LETTER EW - case 0x104B0 => 0x104D8 // OSAGE CAPITAL LETTER A - case 0x104B1 => 0x104D9 // OSAGE CAPITAL LETTER AI - case 0x104B2 => 0x104DA // OSAGE CAPITAL LETTER AIN - case 0x104B3 => 0x104DB // OSAGE CAPITAL LETTER AH - case 0x104B4 => 0x104DC // OSAGE CAPITAL LETTER BRA - case 0x104B5 => 0x104DD // OSAGE CAPITAL LETTER CHA - case 0x104B6 => 0x104DE // OSAGE CAPITAL LETTER EHCHA - case 0x104B7 => 0x104DF // OSAGE CAPITAL LETTER E - case 0x104B8 => 0x104E0 // OSAGE CAPITAL LETTER EIN - case 0x104B9 => 0x104E1 // OSAGE CAPITAL LETTER HA - case 0x104BA => 0x104E2 // OSAGE CAPITAL LETTER HYA - case 0x104BB => 0x104E3 // OSAGE CAPITAL LETTER I - case 0x104BC => 0x104E4 // OSAGE CAPITAL LETTER KA - case 0x104BD => 0x104E5 // OSAGE CAPITAL LETTER EHKA - case 0x104BE => 0x104E6 // OSAGE CAPITAL LETTER KYA - case 0x104BF => 0x104E7 // OSAGE CAPITAL LETTER LA - case 0x104C0 => 0x104E8 // OSAGE CAPITAL LETTER MA - case 0x104C1 => 0x104E9 // OSAGE CAPITAL LETTER NA - case 0x104C2 => 0x104EA // OSAGE CAPITAL LETTER O - case 0x104C3 => 0x104EB // OSAGE CAPITAL LETTER OIN - case 0x104C4 => 0x104EC // OSAGE CAPITAL LETTER PA - case 0x104C5 => 0x104ED // OSAGE CAPITAL LETTER EHPA - case 0x104C6 => 0x104EE // OSAGE CAPITAL LETTER SA - case 0x104C7 => 0x104EF // OSAGE CAPITAL LETTER SHA - case 0x104C8 => 0x104F0 // OSAGE CAPITAL LETTER TA - case 0x104C9 => 0x104F1 // OSAGE CAPITAL LETTER EHTA - case 0x104CA => 0x104F2 // OSAGE CAPITAL LETTER TSA - case 0x104CB => 0x104F3 // OSAGE CAPITAL LETTER EHTSA - case 0x104CC => 0x104F4 // OSAGE CAPITAL LETTER TSHA - case 0x104CD => 0x104F5 // OSAGE CAPITAL LETTER DHA - case 0x104CE => 0x104F6 // OSAGE CAPITAL LETTER U - case 0x104CF => 0x104F7 // OSAGE CAPITAL LETTER WA - case 0x104D0 => 0x104F8 // OSAGE CAPITAL LETTER KHA - case 0x104D1 => 0x104F9 // OSAGE CAPITAL LETTER GHA - case 0x104D2 => 0x104FA // OSAGE CAPITAL LETTER ZA - case 0x104D3 => 0x104FB // OSAGE CAPITAL LETTER ZHA + case 0x10422 => 0x1044a // DESERET CAPITAL LETTER EL + case 0x10423 => 0x1044b // DESERET CAPITAL LETTER EM + case 0x10424 => 0x1044c // DESERET CAPITAL LETTER EN + case 0x10425 => 0x1044d // DESERET CAPITAL LETTER ENG + case 0x10426 => 0x1044e // DESERET CAPITAL LETTER OI + case 0x10427 => 0x1044f // DESERET CAPITAL LETTER EW + case 0x104b0 => 0x104d8 // OSAGE CAPITAL LETTER A + case 0x104b1 => 0x104d9 // OSAGE CAPITAL LETTER AI + case 0x104b2 => 0x104da // OSAGE CAPITAL LETTER AIN + case 0x104b3 => 0x104db // OSAGE CAPITAL LETTER AH + case 0x104b4 => 0x104dc // OSAGE CAPITAL LETTER BRA + case 0x104b5 => 0x104dd // OSAGE CAPITAL LETTER CHA + case 0x104b6 => 0x104de // OSAGE CAPITAL LETTER EHCHA + case 0x104b7 => 0x104df // OSAGE CAPITAL LETTER E + case 0x104b8 => 0x104e0 // OSAGE CAPITAL LETTER EIN + case 0x104b9 => 0x104e1 // OSAGE CAPITAL LETTER HA + case 0x104ba => 0x104e2 // OSAGE CAPITAL LETTER HYA + case 0x104bb => 0x104e3 // OSAGE CAPITAL LETTER I + case 0x104bc => 0x104e4 // OSAGE CAPITAL LETTER KA + case 0x104bd => 0x104e5 // OSAGE CAPITAL LETTER EHKA + case 0x104be => 0x104e6 // OSAGE CAPITAL LETTER KYA + case 0x104bf => 0x104e7 // OSAGE CAPITAL LETTER LA + case 0x104c0 => 0x104e8 // OSAGE CAPITAL LETTER MA + case 0x104c1 => 0x104e9 // OSAGE CAPITAL LETTER NA + case 0x104c2 => 0x104ea // OSAGE CAPITAL LETTER O + case 0x104c3 => 0x104eb // OSAGE CAPITAL LETTER OIN + case 0x104c4 => 0x104ec // OSAGE CAPITAL LETTER PA + case 0x104c5 => 0x104ed // OSAGE CAPITAL LETTER EHPA + case 0x104c6 => 0x104ee // OSAGE CAPITAL LETTER SA + case 0x104c7 => 0x104ef // OSAGE CAPITAL LETTER SHA + case 0x104c8 => 0x104f0 // OSAGE CAPITAL LETTER TA + case 0x104c9 => 0x104f1 // OSAGE CAPITAL LETTER EHTA + case 0x104ca => 0x104f2 // OSAGE CAPITAL LETTER TSA + case 0x104cb => 0x104f3 // OSAGE CAPITAL LETTER EHTSA + case 0x104cc => 0x104f4 // OSAGE CAPITAL LETTER TSHA + case 0x104cd => 0x104f5 // OSAGE CAPITAL LETTER DHA + case 0x104ce => 0x104f6 // OSAGE CAPITAL LETTER U + case 0x104cf => 0x104f7 // OSAGE CAPITAL LETTER WA + case 0x104d0 => 0x104f8 // OSAGE CAPITAL LETTER KHA + case 0x104d1 => 0x104f9 // OSAGE CAPITAL LETTER GHA + case 0x104d2 => 0x104fa // OSAGE CAPITAL LETTER ZA + case 0x104d3 => 0x104fb // OSAGE CAPITAL LETTER ZHA case 0x10570 => 0x10597 // VITHKUQI CAPITAL LETTER A case 0x10571 => 0x10598 // VITHKUQI CAPITAL LETTER BBE case 0x10572 => 0x10599 // VITHKUQI CAPITAL LETTER BE - case 0x10573 => 0x1059A // VITHKUQI CAPITAL LETTER CE - case 0x10574 => 0x1059B // VITHKUQI CAPITAL LETTER CHE - case 0x10575 => 0x1059C // VITHKUQI CAPITAL LETTER DE - case 0x10576 => 0x1059D // VITHKUQI CAPITAL LETTER DHE - case 0x10577 => 0x1059E // VITHKUQI CAPITAL LETTER EI - case 0x10578 => 0x1059F // VITHKUQI CAPITAL LETTER E - case 0x10579 => 0x105A0 // VITHKUQI CAPITAL LETTER FE - case 0x1057A => 0x105A1 // VITHKUQI CAPITAL LETTER GA - case 0x1057C => 0x105A3 // VITHKUQI CAPITAL LETTER HA - case 0x1057D => 0x105A4 // VITHKUQI CAPITAL LETTER HHA - case 0x1057E => 0x105A5 // VITHKUQI CAPITAL LETTER I - case 0x1057F => 0x105A6 // VITHKUQI CAPITAL LETTER IJE - case 0x10580 => 0x105A7 // VITHKUQI CAPITAL LETTER JE - case 0x10581 => 0x105A8 // VITHKUQI CAPITAL LETTER KA - case 0x10582 => 0x105A9 // VITHKUQI CAPITAL LETTER LA - case 0x10583 => 0x105AA // VITHKUQI CAPITAL LETTER LLA - case 0x10584 => 0x105AB // VITHKUQI CAPITAL LETTER ME - case 0x10585 => 0x105AC // VITHKUQI CAPITAL LETTER NE - case 0x10586 => 0x105AD // VITHKUQI CAPITAL LETTER NJE - case 0x10587 => 0x105AE // VITHKUQI CAPITAL LETTER O - case 0x10588 => 0x105AF // VITHKUQI CAPITAL LETTER PE - case 0x10589 => 0x105B0 // VITHKUQI CAPITAL LETTER QA - case 0x1058A => 0x105B1 // VITHKUQI CAPITAL LETTER RE - case 0x1058C => 0x105B3 // VITHKUQI CAPITAL LETTER SE - case 0x1058D => 0x105B4 // VITHKUQI CAPITAL LETTER SHE - case 0x1058E => 0x105B5 // VITHKUQI CAPITAL LETTER TE - case 0x1058F => 0x105B6 // VITHKUQI CAPITAL LETTER THE - case 0x10590 => 0x105B7 // VITHKUQI CAPITAL LETTER U - case 0x10591 => 0x105B8 // VITHKUQI CAPITAL LETTER VE - case 0x10592 => 0x105B9 // VITHKUQI CAPITAL LETTER XE - case 0x10594 => 0x105BB // VITHKUQI CAPITAL LETTER Y - case 0x10595 => 0x105BC // VITHKUQI CAPITAL LETTER ZE - case 0x10C80 => 0x10CC0 // OLD HUNGARIAN CAPITAL LETTER A - case 0x10C81 => 0x10CC1 // OLD HUNGARIAN CAPITAL LETTER AA - case 0x10C82 => 0x10CC2 // OLD HUNGARIAN CAPITAL LETTER EB - case 0x10C83 => 0x10CC3 // OLD HUNGARIAN CAPITAL LETTER AMB - case 0x10C84 => 0x10CC4 // OLD HUNGARIAN CAPITAL LETTER EC - case 0x10C85 => 0x10CC5 // OLD HUNGARIAN CAPITAL LETTER ENC - case 0x10C86 => 0x10CC6 // OLD HUNGARIAN CAPITAL LETTER ECS - case 0x10C87 => 0x10CC7 // OLD HUNGARIAN CAPITAL LETTER ED - case 0x10C88 => 0x10CC8 // OLD HUNGARIAN CAPITAL LETTER AND - case 0x10C89 => 0x10CC9 // OLD HUNGARIAN CAPITAL LETTER E - case 0x10C8A => 0x10CCA // OLD HUNGARIAN CAPITAL LETTER CLOSE E - case 0x10C8B => 0x10CCB // OLD HUNGARIAN CAPITAL LETTER EE - case 0x10C8C => 0x10CCC // OLD HUNGARIAN CAPITAL LETTER EF - case 0x10C8D => 0x10CCD // OLD HUNGARIAN CAPITAL LETTER EG - case 0x10C8E => 0x10CCE // OLD HUNGARIAN CAPITAL LETTER EGY - case 0x10C8F => 0x10CCF // OLD HUNGARIAN CAPITAL LETTER EH - case 0x10C90 => 0x10CD0 // OLD HUNGARIAN CAPITAL LETTER I - case 0x10C91 => 0x10CD1 // OLD HUNGARIAN CAPITAL LETTER II - case 0x10C92 => 0x10CD2 // OLD HUNGARIAN CAPITAL LETTER EJ - case 0x10C93 => 0x10CD3 // OLD HUNGARIAN CAPITAL LETTER EK - case 0x10C94 => 0x10CD4 // OLD HUNGARIAN CAPITAL LETTER AK - case 0x10C95 => 0x10CD5 // OLD HUNGARIAN CAPITAL LETTER UNK - case 0x10C96 => 0x10CD6 // OLD HUNGARIAN CAPITAL LETTER EL - case 0x10C97 => 0x10CD7 // OLD HUNGARIAN CAPITAL LETTER ELY - case 0x10C98 => 0x10CD8 // OLD HUNGARIAN CAPITAL LETTER EM - case 0x10C99 => 0x10CD9 // OLD HUNGARIAN CAPITAL LETTER EN - case 0x10C9A => 0x10CDA // OLD HUNGARIAN CAPITAL LETTER ENY - case 0x10C9B => 0x10CDB // OLD HUNGARIAN CAPITAL LETTER O - case 0x10C9C => 0x10CDC // OLD HUNGARIAN CAPITAL LETTER OO - case 0x10C9D => 0x10CDD // OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE - case 0x10C9E => 0x10CDE // OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE - case 0x10C9F => 0x10CDF // OLD HUNGARIAN CAPITAL LETTER OEE - case 0x10CA0 => 0x10CE0 // OLD HUNGARIAN CAPITAL LETTER EP - case 0x10CA1 => 0x10CE1 // OLD HUNGARIAN CAPITAL LETTER EMP - case 0x10CA2 => 0x10CE2 // OLD HUNGARIAN CAPITAL LETTER ER - case 0x10CA3 => 0x10CE3 // OLD HUNGARIAN CAPITAL LETTER SHORT ER - case 0x10CA4 => 0x10CE4 // OLD HUNGARIAN CAPITAL LETTER ES - case 0x10CA5 => 0x10CE5 // OLD HUNGARIAN CAPITAL LETTER ESZ - case 0x10CA6 => 0x10CE6 // OLD HUNGARIAN CAPITAL LETTER ET - case 0x10CA7 => 0x10CE7 // OLD HUNGARIAN CAPITAL LETTER ENT - case 0x10CA8 => 0x10CE8 // OLD HUNGARIAN CAPITAL LETTER ETY - case 0x10CA9 => 0x10CE9 // OLD HUNGARIAN CAPITAL LETTER ECH - case 0x10CAA => 0x10CEA // OLD HUNGARIAN CAPITAL LETTER U - case 0x10CAB => 0x10CEB // OLD HUNGARIAN CAPITAL LETTER UU - case 0x10CAC => 0x10CEC // OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE - case 0x10CAD => 0x10CED // OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE - case 0x10CAE => 0x10CEE // OLD HUNGARIAN CAPITAL LETTER EV - case 0x10CAF => 0x10CEF // OLD HUNGARIAN CAPITAL LETTER EZ - case 0x10CB0 => 0x10CF0 // OLD HUNGARIAN CAPITAL LETTER EZS - case 0x10CB1 => 0x10CF1 // OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN - case 0x10CB2 => 0x10CF2 // OLD HUNGARIAN CAPITAL LETTER US - case 0x118A0 => 0x118C0 // WARANG CITI CAPITAL LETTER NGAA - case 0x118A1 => 0x118C1 // WARANG CITI CAPITAL LETTER A - case 0x118A2 => 0x118C2 // WARANG CITI CAPITAL LETTER WI - case 0x118A3 => 0x118C3 // WARANG CITI CAPITAL LETTER YU - case 0x118A4 => 0x118C4 // WARANG CITI CAPITAL LETTER YA - case 0x118A5 => 0x118C5 // WARANG CITI CAPITAL LETTER YO - case 0x118A6 => 0x118C6 // WARANG CITI CAPITAL LETTER II - case 0x118A7 => 0x118C7 // WARANG CITI CAPITAL LETTER UU - case 0x118A8 => 0x118C8 // WARANG CITI CAPITAL LETTER E - case 0x118A9 => 0x118C9 // WARANG CITI CAPITAL LETTER O - case 0x118AA => 0x118CA // WARANG CITI CAPITAL LETTER ANG - case 0x118AB => 0x118CB // WARANG CITI CAPITAL LETTER GA - case 0x118AC => 0x118CC // WARANG CITI CAPITAL LETTER KO - case 0x118AD => 0x118CD // WARANG CITI CAPITAL LETTER ENY - case 0x118AE => 0x118CE // WARANG CITI CAPITAL LETTER YUJ - case 0x118AF => 0x118CF // WARANG CITI CAPITAL LETTER UC - case 0x118B0 => 0x118D0 // WARANG CITI CAPITAL LETTER ENN - case 0x118B1 => 0x118D1 // WARANG CITI CAPITAL LETTER ODD - case 0x118B2 => 0x118D2 // WARANG CITI CAPITAL LETTER TTE - case 0x118B3 => 0x118D3 // WARANG CITI CAPITAL LETTER NUNG - case 0x118B4 => 0x118D4 // WARANG CITI CAPITAL LETTER DA - case 0x118B5 => 0x118D5 // WARANG CITI CAPITAL LETTER AT - case 0x118B6 => 0x118D6 // WARANG CITI CAPITAL LETTER AM - case 0x118B7 => 0x118D7 // WARANG CITI CAPITAL LETTER BU - case 0x118B8 => 0x118D8 // WARANG CITI CAPITAL LETTER PU - case 0x118B9 => 0x118D9 // WARANG CITI CAPITAL LETTER HIYO - case 0x118BA => 0x118DA // WARANG CITI CAPITAL LETTER HOLO - case 0x118BB => 0x118DB // WARANG CITI CAPITAL LETTER HORR - case 0x118BC => 0x118DC // WARANG CITI CAPITAL LETTER HAR - case 0x118BD => 0x118DD // WARANG CITI CAPITAL LETTER SSUU - case 0x118BE => 0x118DE // WARANG CITI CAPITAL LETTER SII - case 0x118BF => 0x118DF // WARANG CITI CAPITAL LETTER VIYO - case 0x16E40 => 0x16E60 // MEDEFAIDRIN CAPITAL LETTER M - case 0x16E41 => 0x16E61 // MEDEFAIDRIN CAPITAL LETTER S - case 0x16E42 => 0x16E62 // MEDEFAIDRIN CAPITAL LETTER V - case 0x16E43 => 0x16E63 // MEDEFAIDRIN CAPITAL LETTER W - case 0x16E44 => 0x16E64 // MEDEFAIDRIN CAPITAL LETTER ATIU - case 0x16E45 => 0x16E65 // MEDEFAIDRIN CAPITAL LETTER Z - case 0x16E46 => 0x16E66 // MEDEFAIDRIN CAPITAL LETTER KP - case 0x16E47 => 0x16E67 // MEDEFAIDRIN CAPITAL LETTER P - case 0x16E48 => 0x16E68 // MEDEFAIDRIN CAPITAL LETTER T - case 0x16E49 => 0x16E69 // MEDEFAIDRIN CAPITAL LETTER G - case 0x16E4A => 0x16E6A // MEDEFAIDRIN CAPITAL LETTER F - case 0x16E4B => 0x16E6B // MEDEFAIDRIN CAPITAL LETTER I - case 0x16E4C => 0x16E6C // MEDEFAIDRIN CAPITAL LETTER K - case 0x16E4D => 0x16E6D // MEDEFAIDRIN CAPITAL LETTER A - case 0x16E4E => 0x16E6E // MEDEFAIDRIN CAPITAL LETTER J - case 0x16E4F => 0x16E6F // MEDEFAIDRIN CAPITAL LETTER E - case 0x16E50 => 0x16E70 // MEDEFAIDRIN CAPITAL LETTER B - case 0x16E51 => 0x16E71 // MEDEFAIDRIN CAPITAL LETTER C - case 0x16E52 => 0x16E72 // MEDEFAIDRIN CAPITAL LETTER U - case 0x16E53 => 0x16E73 // MEDEFAIDRIN CAPITAL LETTER YU - case 0x16E54 => 0x16E74 // MEDEFAIDRIN CAPITAL LETTER L - case 0x16E55 => 0x16E75 // MEDEFAIDRIN CAPITAL LETTER Q - case 0x16E56 => 0x16E76 // MEDEFAIDRIN CAPITAL LETTER HP - case 0x16E57 => 0x16E77 // MEDEFAIDRIN CAPITAL LETTER NY - case 0x16E58 => 0x16E78 // MEDEFAIDRIN CAPITAL LETTER X - case 0x16E59 => 0x16E79 // MEDEFAIDRIN CAPITAL LETTER D - case 0x16E5A => 0x16E7A // MEDEFAIDRIN CAPITAL LETTER OE - case 0x16E5B => 0x16E7B // MEDEFAIDRIN CAPITAL LETTER N - case 0x16E5C => 0x16E7C // MEDEFAIDRIN CAPITAL LETTER R - case 0x16E5D => 0x16E7D // MEDEFAIDRIN CAPITAL LETTER O - case 0x16E5E => 0x16E7E // MEDEFAIDRIN CAPITAL LETTER AI - case 0x16E5F => 0x16E7F // MEDEFAIDRIN CAPITAL LETTER Y - case 0x1E900 => 0x1E922 // ADLAM CAPITAL LETTER ALIF - case 0x1E901 => 0x1E923 // ADLAM CAPITAL LETTER DAALI - case 0x1E902 => 0x1E924 // ADLAM CAPITAL LETTER LAAM - case 0x1E903 => 0x1E925 // ADLAM CAPITAL LETTER MIIM - case 0x1E904 => 0x1E926 // ADLAM CAPITAL LETTER BA - case 0x1E905 => 0x1E927 // ADLAM CAPITAL LETTER SINNYIIYHE - case 0x1E906 => 0x1E928 // ADLAM CAPITAL LETTER PE - case 0x1E907 => 0x1E929 // ADLAM CAPITAL LETTER BHE - case 0x1E908 => 0x1E92A // ADLAM CAPITAL LETTER RA - case 0x1E909 => 0x1E92B // ADLAM CAPITAL LETTER E - case 0x1E90A => 0x1E92C // ADLAM CAPITAL LETTER FA - case 0x1E90B => 0x1E92D // ADLAM CAPITAL LETTER I - case 0x1E90C => 0x1E92E // ADLAM CAPITAL LETTER O - case 0x1E90D => 0x1E92F // ADLAM CAPITAL LETTER DHA - case 0x1E90E => 0x1E930 // ADLAM CAPITAL LETTER YHE - case 0x1E90F => 0x1E931 // ADLAM CAPITAL LETTER WAW - case 0x1E910 => 0x1E932 // ADLAM CAPITAL LETTER NUN - case 0x1E911 => 0x1E933 // ADLAM CAPITAL LETTER KAF - case 0x1E912 => 0x1E934 // ADLAM CAPITAL LETTER YA - case 0x1E913 => 0x1E935 // ADLAM CAPITAL LETTER U - case 0x1E914 => 0x1E936 // ADLAM CAPITAL LETTER JIIM - case 0x1E915 => 0x1E937 // ADLAM CAPITAL LETTER CHI - case 0x1E916 => 0x1E938 // ADLAM CAPITAL LETTER HA - case 0x1E917 => 0x1E939 // ADLAM CAPITAL LETTER QAAF - case 0x1E918 => 0x1E93A // ADLAM CAPITAL LETTER GA - case 0x1E919 => 0x1E93B // ADLAM CAPITAL LETTER NYA - case 0x1E91A => 0x1E93C // ADLAM CAPITAL LETTER TU - case 0x1E91B => 0x1E93D // ADLAM CAPITAL LETTER NHA - case 0x1E91C => 0x1E93E // ADLAM CAPITAL LETTER VA - case 0x1E91D => 0x1E93F // ADLAM CAPITAL LETTER KHA - case 0x1E91E => 0x1E940 // ADLAM CAPITAL LETTER GBE - case 0x1E91F => 0x1E941 // ADLAM CAPITAL LETTER ZAL - case 0x1E920 => 0x1E942 // ADLAM CAPITAL LETTER KPO - case 0x1E921 => 0x1E943 // ADLAM CAPITAL LETTER SHA + case 0x10573 => 0x1059a // VITHKUQI CAPITAL LETTER CE + case 0x10574 => 0x1059b // VITHKUQI CAPITAL LETTER CHE + case 0x10575 => 0x1059c // VITHKUQI CAPITAL LETTER DE + case 0x10576 => 0x1059d // VITHKUQI CAPITAL LETTER DHE + case 0x10577 => 0x1059e // VITHKUQI CAPITAL LETTER EI + case 0x10578 => 0x1059f // VITHKUQI CAPITAL LETTER E + case 0x10579 => 0x105a0 // VITHKUQI CAPITAL LETTER FE + case 0x1057a => 0x105a1 // VITHKUQI CAPITAL LETTER GA + case 0x1057c => 0x105a3 // VITHKUQI CAPITAL LETTER HA + case 0x1057d => 0x105a4 // VITHKUQI CAPITAL LETTER HHA + case 0x1057e => 0x105a5 // VITHKUQI CAPITAL LETTER I + case 0x1057f => 0x105a6 // VITHKUQI CAPITAL LETTER IJE + case 0x10580 => 0x105a7 // VITHKUQI CAPITAL LETTER JE + case 0x10581 => 0x105a8 // VITHKUQI CAPITAL LETTER KA + case 0x10582 => 0x105a9 // VITHKUQI CAPITAL LETTER LA + case 0x10583 => 0x105aa // VITHKUQI CAPITAL LETTER LLA + case 0x10584 => 0x105ab // VITHKUQI CAPITAL LETTER ME + case 0x10585 => 0x105ac // VITHKUQI CAPITAL LETTER NE + case 0x10586 => 0x105ad // VITHKUQI CAPITAL LETTER NJE + case 0x10587 => 0x105ae // VITHKUQI CAPITAL LETTER O + case 0x10588 => 0x105af // VITHKUQI CAPITAL LETTER PE + case 0x10589 => 0x105b0 // VITHKUQI CAPITAL LETTER QA + case 0x1058a => 0x105b1 // VITHKUQI CAPITAL LETTER RE + case 0x1058c => 0x105b3 // VITHKUQI CAPITAL LETTER SE + case 0x1058d => 0x105b4 // VITHKUQI CAPITAL LETTER SHE + case 0x1058e => 0x105b5 // VITHKUQI CAPITAL LETTER TE + case 0x1058f => 0x105b6 // VITHKUQI CAPITAL LETTER THE + case 0x10590 => 0x105b7 // VITHKUQI CAPITAL LETTER U + case 0x10591 => 0x105b8 // VITHKUQI CAPITAL LETTER VE + case 0x10592 => 0x105b9 // VITHKUQI CAPITAL LETTER XE + case 0x10594 => 0x105bb // VITHKUQI CAPITAL LETTER Y + case 0x10595 => 0x105bc // VITHKUQI CAPITAL LETTER ZE + case 0x10c80 => 0x10cc0 // OLD HUNGARIAN CAPITAL LETTER A + case 0x10c81 => 0x10cc1 // OLD HUNGARIAN CAPITAL LETTER AA + case 0x10c82 => 0x10cc2 // OLD HUNGARIAN CAPITAL LETTER EB + case 0x10c83 => 0x10cc3 // OLD HUNGARIAN CAPITAL LETTER AMB + case 0x10c84 => 0x10cc4 // OLD HUNGARIAN CAPITAL LETTER EC + case 0x10c85 => 0x10cc5 // OLD HUNGARIAN CAPITAL LETTER ENC + case 0x10c86 => 0x10cc6 // OLD HUNGARIAN CAPITAL LETTER ECS + case 0x10c87 => 0x10cc7 // OLD HUNGARIAN CAPITAL LETTER ED + case 0x10c88 => 0x10cc8 // OLD HUNGARIAN CAPITAL LETTER AND + case 0x10c89 => 0x10cc9 // OLD HUNGARIAN CAPITAL LETTER E + case 0x10c8a => 0x10cca // OLD HUNGARIAN CAPITAL LETTER CLOSE E + case 0x10c8b => 0x10ccb // OLD HUNGARIAN CAPITAL LETTER EE + case 0x10c8c => 0x10ccc // OLD HUNGARIAN CAPITAL LETTER EF + case 0x10c8d => 0x10ccd // OLD HUNGARIAN CAPITAL LETTER EG + case 0x10c8e => 0x10cce // OLD HUNGARIAN CAPITAL LETTER EGY + case 0x10c8f => 0x10ccf // OLD HUNGARIAN CAPITAL LETTER EH + case 0x10c90 => 0x10cd0 // OLD HUNGARIAN CAPITAL LETTER I + case 0x10c91 => 0x10cd1 // OLD HUNGARIAN CAPITAL LETTER II + case 0x10c92 => 0x10cd2 // OLD HUNGARIAN CAPITAL LETTER EJ + case 0x10c93 => 0x10cd3 // OLD HUNGARIAN CAPITAL LETTER EK + case 0x10c94 => 0x10cd4 // OLD HUNGARIAN CAPITAL LETTER AK + case 0x10c95 => 0x10cd5 // OLD HUNGARIAN CAPITAL LETTER UNK + case 0x10c96 => 0x10cd6 // OLD HUNGARIAN CAPITAL LETTER EL + case 0x10c97 => 0x10cd7 // OLD HUNGARIAN CAPITAL LETTER ELY + case 0x10c98 => 0x10cd8 // OLD HUNGARIAN CAPITAL LETTER EM + case 0x10c99 => 0x10cd9 // OLD HUNGARIAN CAPITAL LETTER EN + case 0x10c9a => 0x10cda // OLD HUNGARIAN CAPITAL LETTER ENY + case 0x10c9b => 0x10cdb // OLD HUNGARIAN CAPITAL LETTER O + case 0x10c9c => 0x10cdc // OLD HUNGARIAN CAPITAL LETTER OO + case 0x10c9d => 0x10cdd // OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE + case 0x10c9e => 0x10cde // OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE + case 0x10c9f => 0x10cdf // OLD HUNGARIAN CAPITAL LETTER OEE + case 0x10ca0 => 0x10ce0 // OLD HUNGARIAN CAPITAL LETTER EP + case 0x10ca1 => 0x10ce1 // OLD HUNGARIAN CAPITAL LETTER EMP + case 0x10ca2 => 0x10ce2 // OLD HUNGARIAN CAPITAL LETTER ER + case 0x10ca3 => 0x10ce3 // OLD HUNGARIAN CAPITAL LETTER SHORT ER + case 0x10ca4 => 0x10ce4 // OLD HUNGARIAN CAPITAL LETTER ES + case 0x10ca5 => 0x10ce5 // OLD HUNGARIAN CAPITAL LETTER ESZ + case 0x10ca6 => 0x10ce6 // OLD HUNGARIAN CAPITAL LETTER ET + case 0x10ca7 => 0x10ce7 // OLD HUNGARIAN CAPITAL LETTER ENT + case 0x10ca8 => 0x10ce8 // OLD HUNGARIAN CAPITAL LETTER ETY + case 0x10ca9 => 0x10ce9 // OLD HUNGARIAN CAPITAL LETTER ECH + case 0x10caa => 0x10cea // OLD HUNGARIAN CAPITAL LETTER U + case 0x10cab => 0x10ceb // OLD HUNGARIAN CAPITAL LETTER UU + case 0x10cac => 0x10cec // OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE + case 0x10cad => 0x10ced // OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE + case 0x10cae => 0x10cee // OLD HUNGARIAN CAPITAL LETTER EV + case 0x10caf => 0x10cef // OLD HUNGARIAN CAPITAL LETTER EZ + case 0x10cb0 => 0x10cf0 // OLD HUNGARIAN CAPITAL LETTER EZS + case 0x10cb1 => 0x10cf1 // OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN + case 0x10cb2 => 0x10cf2 // OLD HUNGARIAN CAPITAL LETTER US + case 0x118a0 => 0x118c0 // WARANG CITI CAPITAL LETTER NGAA + case 0x118a1 => 0x118c1 // WARANG CITI CAPITAL LETTER A + case 0x118a2 => 0x118c2 // WARANG CITI CAPITAL LETTER WI + case 0x118a3 => 0x118c3 // WARANG CITI CAPITAL LETTER YU + case 0x118a4 => 0x118c4 // WARANG CITI CAPITAL LETTER YA + case 0x118a5 => 0x118c5 // WARANG CITI CAPITAL LETTER YO + case 0x118a6 => 0x118c6 // WARANG CITI CAPITAL LETTER II + case 0x118a7 => 0x118c7 // WARANG CITI CAPITAL LETTER UU + case 0x118a8 => 0x118c8 // WARANG CITI CAPITAL LETTER E + case 0x118a9 => 0x118c9 // WARANG CITI CAPITAL LETTER O + case 0x118aa => 0x118ca // WARANG CITI CAPITAL LETTER ANG + case 0x118ab => 0x118cb // WARANG CITI CAPITAL LETTER GA + case 0x118ac => 0x118cc // WARANG CITI CAPITAL LETTER KO + case 0x118ad => 0x118cd // WARANG CITI CAPITAL LETTER ENY + case 0x118ae => 0x118ce // WARANG CITI CAPITAL LETTER YUJ + case 0x118af => 0x118cf // WARANG CITI CAPITAL LETTER UC + case 0x118b0 => 0x118d0 // WARANG CITI CAPITAL LETTER ENN + case 0x118b1 => 0x118d1 // WARANG CITI CAPITAL LETTER ODD + case 0x118b2 => 0x118d2 // WARANG CITI CAPITAL LETTER TTE + case 0x118b3 => 0x118d3 // WARANG CITI CAPITAL LETTER NUNG + case 0x118b4 => 0x118d4 // WARANG CITI CAPITAL LETTER DA + case 0x118b5 => 0x118d5 // WARANG CITI CAPITAL LETTER AT + case 0x118b6 => 0x118d6 // WARANG CITI CAPITAL LETTER AM + case 0x118b7 => 0x118d7 // WARANG CITI CAPITAL LETTER BU + case 0x118b8 => 0x118d8 // WARANG CITI CAPITAL LETTER PU + case 0x118b9 => 0x118d9 // WARANG CITI CAPITAL LETTER HIYO + case 0x118ba => 0x118da // WARANG CITI CAPITAL LETTER HOLO + case 0x118bb => 0x118db // WARANG CITI CAPITAL LETTER HORR + case 0x118bc => 0x118dc // WARANG CITI CAPITAL LETTER HAR + case 0x118bd => 0x118dd // WARANG CITI CAPITAL LETTER SSUU + case 0x118be => 0x118de // WARANG CITI CAPITAL LETTER SII + case 0x118bf => 0x118df // WARANG CITI CAPITAL LETTER VIYO + case 0x16e40 => 0x16e60 // MEDEFAIDRIN CAPITAL LETTER M + case 0x16e41 => 0x16e61 // MEDEFAIDRIN CAPITAL LETTER S + case 0x16e42 => 0x16e62 // MEDEFAIDRIN CAPITAL LETTER V + case 0x16e43 => 0x16e63 // MEDEFAIDRIN CAPITAL LETTER W + case 0x16e44 => 0x16e64 // MEDEFAIDRIN CAPITAL LETTER ATIU + case 0x16e45 => 0x16e65 // MEDEFAIDRIN CAPITAL LETTER Z + case 0x16e46 => 0x16e66 // MEDEFAIDRIN CAPITAL LETTER KP + case 0x16e47 => 0x16e67 // MEDEFAIDRIN CAPITAL LETTER P + case 0x16e48 => 0x16e68 // MEDEFAIDRIN CAPITAL LETTER T + case 0x16e49 => 0x16e69 // MEDEFAIDRIN CAPITAL LETTER G + case 0x16e4a => 0x16e6a // MEDEFAIDRIN CAPITAL LETTER F + case 0x16e4b => 0x16e6b // MEDEFAIDRIN CAPITAL LETTER I + case 0x16e4c => 0x16e6c // MEDEFAIDRIN CAPITAL LETTER K + case 0x16e4d => 0x16e6d // MEDEFAIDRIN CAPITAL LETTER A + case 0x16e4e => 0x16e6e // MEDEFAIDRIN CAPITAL LETTER J + case 0x16e4f => 0x16e6f // MEDEFAIDRIN CAPITAL LETTER E + case 0x16e50 => 0x16e70 // MEDEFAIDRIN CAPITAL LETTER B + case 0x16e51 => 0x16e71 // MEDEFAIDRIN CAPITAL LETTER C + case 0x16e52 => 0x16e72 // MEDEFAIDRIN CAPITAL LETTER U + case 0x16e53 => 0x16e73 // MEDEFAIDRIN CAPITAL LETTER YU + case 0x16e54 => 0x16e74 // MEDEFAIDRIN CAPITAL LETTER L + case 0x16e55 => 0x16e75 // MEDEFAIDRIN CAPITAL LETTER Q + case 0x16e56 => 0x16e76 // MEDEFAIDRIN CAPITAL LETTER HP + case 0x16e57 => 0x16e77 // MEDEFAIDRIN CAPITAL LETTER NY + case 0x16e58 => 0x16e78 // MEDEFAIDRIN CAPITAL LETTER X + case 0x16e59 => 0x16e79 // MEDEFAIDRIN CAPITAL LETTER D + case 0x16e5a => 0x16e7a // MEDEFAIDRIN CAPITAL LETTER OE + case 0x16e5b => 0x16e7b // MEDEFAIDRIN CAPITAL LETTER N + case 0x16e5c => 0x16e7c // MEDEFAIDRIN CAPITAL LETTER R + case 0x16e5d => 0x16e7d // MEDEFAIDRIN CAPITAL LETTER O + case 0x16e5e => 0x16e7e // MEDEFAIDRIN CAPITAL LETTER AI + case 0x16e5f => 0x16e7f // MEDEFAIDRIN CAPITAL LETTER Y + case 0x1e900 => 0x1e922 // ADLAM CAPITAL LETTER ALIF + case 0x1e901 => 0x1e923 // ADLAM CAPITAL LETTER DAALI + case 0x1e902 => 0x1e924 // ADLAM CAPITAL LETTER LAAM + case 0x1e903 => 0x1e925 // ADLAM CAPITAL LETTER MIIM + case 0x1e904 => 0x1e926 // ADLAM CAPITAL LETTER BA + case 0x1e905 => 0x1e927 // ADLAM CAPITAL LETTER SINNYIIYHE + case 0x1e906 => 0x1e928 // ADLAM CAPITAL LETTER PE + case 0x1e907 => 0x1e929 // ADLAM CAPITAL LETTER BHE + case 0x1e908 => 0x1e92a // ADLAM CAPITAL LETTER RA + case 0x1e909 => 0x1e92b // ADLAM CAPITAL LETTER E + case 0x1e90a => 0x1e92c // ADLAM CAPITAL LETTER FA + case 0x1e90b => 0x1e92d // ADLAM CAPITAL LETTER I + case 0x1e90c => 0x1e92e // ADLAM CAPITAL LETTER O + case 0x1e90d => 0x1e92f // ADLAM CAPITAL LETTER DHA + case 0x1e90e => 0x1e930 // ADLAM CAPITAL LETTER YHE + case 0x1e90f => 0x1e931 // ADLAM CAPITAL LETTER WAW + case 0x1e910 => 0x1e932 // ADLAM CAPITAL LETTER NUN + case 0x1e911 => 0x1e933 // ADLAM CAPITAL LETTER KAF + case 0x1e912 => 0x1e934 // ADLAM CAPITAL LETTER YA + case 0x1e913 => 0x1e935 // ADLAM CAPITAL LETTER U + case 0x1e914 => 0x1e936 // ADLAM CAPITAL LETTER JIIM + case 0x1e915 => 0x1e937 // ADLAM CAPITAL LETTER CHI + case 0x1e916 => 0x1e938 // ADLAM CAPITAL LETTER HA + case 0x1e917 => 0x1e939 // ADLAM CAPITAL LETTER QAAF + case 0x1e918 => 0x1e93a // ADLAM CAPITAL LETTER GA + case 0x1e919 => 0x1e93b // ADLAM CAPITAL LETTER NYA + case 0x1e91a => 0x1e93c // ADLAM CAPITAL LETTER TU + case 0x1e91b => 0x1e93d // ADLAM CAPITAL LETTER NHA + case 0x1e91c => 0x1e93e // ADLAM CAPITAL LETTER VA + case 0x1e91d => 0x1e93f // ADLAM CAPITAL LETTER KHA + case 0x1e91e => 0x1e940 // ADLAM CAPITAL LETTER GBE + case 0x1e91f => 0x1e941 // ADLAM CAPITAL LETTER ZAL + case 0x1e920 => 0x1e942 // ADLAM CAPITAL LETTER KPO + case 0x1e921 => 0x1e943 // ADLAM CAPITAL LETTER SHA case _ => codePoint // All others map to themselves } } diff --git a/core/src/main/scala/org/typelevel/ci/CompatibilityFullCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CompatibilityFullCaseFoldedString.scala new file mode 100644 index 0000000..45fe782 --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CompatibilityFullCaseFoldedString.scala @@ -0,0 +1,70 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import java.text.Normalizer +import scala.annotation.tailrec + +final case class CompatibilityFullCaseFoldedString private (override val toString: String) + extends AnyVal + +object CompatibilityFullCaseFoldedString { + + def apply(value: String): CompatibilityFullCaseFoldedString = { + val nfdNormal: String = + if (Normalizer.isNormalized(value, Normalizer.Form.NFD)) { + value + } else { + Normalizer.normalize(value, Normalizer.Form.NFD) + } + + val caseFold0: String = + CaseFolding.fullCaseFoldString(nfdNormal) + + val nfkdNormal0: String = + Normalizer.normalize(caseFold0, Normalizer.Form.NFKD) + + val caseFold1: String = + CaseFolding.fullCaseFoldString(nfkdNormal0) + + val nfkdNormal1: String = + Normalizer.normalize(caseFold1, Normalizer.Form.NFKD) + + // scalafmt:off + // + // Yes, you read that right. Round and round we go. + // + // > "Yes, that’s it," said the Hatter with a sigh: + // > "it’s always tea-time, and we’ve no time to wash the things between whiles." + // > + // > "Then you keep moving round, I suppose?" said Alice. + // > + // > "Exactly so," said the Hatter: "as the things get used up." + // > + // > "But what happens when you come to the beginning again?" Alice ventured to ask. + // > + // > "Suppose we change the subject," the March Hare interrupted, yawning + // + // - Alice's Adventures In Wonderland, Chapter VII, by Lewis Carroll + // + // scalafmt:on + CompatibilityFullCaseFoldedString(nfkdNormal1) + } + + val empty: CompatibilityFullCaseFoldedString = + apply("") +} diff --git a/core/src/main/scala/org/typelevel/ci/CompatibilitySimpleCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CompatibilitySimpleCaseFoldedString.scala new file mode 100644 index 0000000..bbf8744 --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CompatibilitySimpleCaseFoldedString.scala @@ -0,0 +1,51 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import java.text.Normalizer +import scala.annotation.tailrec + +final case class CompatibilitySimpleCaseFoldedString private (override val toString: String) + extends AnyVal + +object CompatibilitySimpleCaseFoldedString { + def apply(value: String): CompatibilitySimpleCaseFoldedString = { + val nfdNormal: String = + if (Normalizer.isNormalized(value, Normalizer.Form.NFD)) { + value + } else { + Normalizer.normalize(value, Normalizer.Form.NFD) + } + + val caseFold0: String = + CaseFolding.simpleCaseFoldString(nfdNormal) + + val nfkdNormal0: String = + Normalizer.normalize(caseFold0, Normalizer.Form.NFKD) + + val caseFold1: String = + CaseFolding.simpleCaseFoldString(nfkdNormal0) + + val nfkdNormal1: String = + Normalizer.normalize(caseFold1, Normalizer.Form.NFKD) + + CompatibilitySimpleCaseFoldedString(nfkdNormal1) + } + + val empty: CompatibilitySimpleCaseFoldedString = + apply("") +} diff --git a/core/src/main/scala/org/typelevel/ci/CompatibilityTurkicFullCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CompatibilityTurkicFullCaseFoldedString.scala new file mode 100644 index 0000000..e1cc63f --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CompatibilityTurkicFullCaseFoldedString.scala @@ -0,0 +1,52 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import java.text.Normalizer +import scala.annotation.tailrec + +final case class CompatibilityTurkicFullCaseFoldedString private (override val toString: String) + extends AnyVal + +object CompatibilityTurkicFullCaseFoldedString { + + def apply(value: String): CompatibilityTurkicFullCaseFoldedString = { + val nfdNormal: String = + if (Normalizer.isNormalized(value, Normalizer.Form.NFD)) { + value + } else { + Normalizer.normalize(value, Normalizer.Form.NFD) + } + + val caseFold0: String = + CaseFolding.turkicFullCaseFoldString(nfdNormal) + + val nfkdNormal0: String = + Normalizer.normalize(caseFold0, Normalizer.Form.NFKD) + + val caseFold1: String = + CaseFolding.turkicFullCaseFoldString(nfkdNormal0) + + val nfkdNormal1: String = + Normalizer.normalize(caseFold1, Normalizer.Form.NFKD) + + CompatibilityTurkicFullCaseFoldedString(nfkdNormal1) + } + + val empty: CompatibilityTurkicFullCaseFoldedString = + apply("") +} diff --git a/core/src/main/scala/org/typelevel/ci/CompatibilityTurkicSimpleCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CompatibilityTurkicSimpleCaseFoldedString.scala new file mode 100644 index 0000000..6c3be24 --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/CompatibilityTurkicSimpleCaseFoldedString.scala @@ -0,0 +1,52 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import java.text.Normalizer +import scala.annotation.tailrec + +final case class CompatibilityTurkicSimpleCaseFoldedString private (override val toString: String) + extends AnyVal + +object CompatibilityTurkicSimpleCaseFoldedString { + + def apply(value: String): CompatibilityTurkicSimpleCaseFoldedString = { + val nfdNormal: String = + if (Normalizer.isNormalized(value, Normalizer.Form.NFD)) { + value + } else { + Normalizer.normalize(value, Normalizer.Form.NFD) + } + + val caseFold0: String = + CaseFolding.turkicSimpleCaseFoldString(nfdNormal) + + val nfkdNormal0: String = + Normalizer.normalize(caseFold0, Normalizer.Form.NFKD) + + val caseFold1: String = + CaseFolding.turkicSimpleCaseFoldString(nfkdNormal0) + + val nfkdNormal1: String = + Normalizer.normalize(caseFold1, Normalizer.Form.NFKD) + + CompatibilityTurkicSimpleCaseFoldedString(nfkdNormal1) + } + + val empty: CompatibilityTurkicSimpleCaseFoldedString = + apply("") +} diff --git a/core/src/main/scala/org/typelevel/ci/FullCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/FullCaseFoldedString.scala new file mode 100644 index 0000000..d084d88 --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/FullCaseFoldedString.scala @@ -0,0 +1,34 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import cats._ +import cats.kernel._ +import scala.annotation.tailrec + +final case class FullCaseFoldedString private (override val toString: String) extends AnyVal { + private final def copy(toString: String = toString): FullCaseFoldedString = + FullCaseFoldedString(toString) +} + +object FullCaseFoldedString { + def apply(value: String): FullCaseFoldedString = + new FullCaseFoldedString(CaseFolding.fullCaseFoldString(value)) + + val empty: FullCaseFoldedString = + apply("") +} diff --git a/core/src/main/scala/org/typelevel/ci/SimpleCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/SimpleCaseFoldedString.scala new file mode 100644 index 0000000..7176247 --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/SimpleCaseFoldedString.scala @@ -0,0 +1,73 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import cats._ +import cats.kernel._ +import cats.syntax.all._ +import scala.annotation.tailrec + +final case class SimpleCaseFoldedString private (override val toString: String) extends AnyVal + +object SimpleCaseFoldedString { + def apply(value: String): SimpleCaseFoldedString = + new SimpleCaseFoldedString(CaseFolding.simpleCaseFoldString(value)) + + val empty: SimpleCaseFoldedString = + apply("") + + implicit val hashAndOrderForSimpleCaseFoldedString + : Hash[SimpleCaseFoldedString] with Order[SimpleCaseFoldedString] = + new Hash[SimpleCaseFoldedString] with Order[SimpleCaseFoldedString] { + override def hash(x: SimpleCaseFoldedString): Int = + x.hashCode + + override def compare(x: SimpleCaseFoldedString, y: SimpleCaseFoldedString): Int = + x.toString.compare(y.toString) + } + + implicit val orderingForSimpleCaseFoldedString: Ordering[SimpleCaseFoldedString] = + hashAndOrderForSimpleCaseFoldedString.toOrdering + + implicit val showForSimpleCaseFoldedString: Show[SimpleCaseFoldedString] = + Show.fromToString + + implicit val lowerBoundForSimpleCaseFoldedString: LowerBounded[SimpleCaseFoldedString] = + new LowerBounded[SimpleCaseFoldedString] { + override val partialOrder: PartialOrder[SimpleCaseFoldedString] = + hashAndOrderForSimpleCaseFoldedString + + override val minBound: SimpleCaseFoldedString = + empty + } + + implicit val monoidForSimpleCaseFoldedString: Monoid[SimpleCaseFoldedString] = + new Monoid[SimpleCaseFoldedString] { + override val empty: SimpleCaseFoldedString = SimpleCaseFoldedString.empty + + override def combine( + x: SimpleCaseFoldedString, + y: SimpleCaseFoldedString): SimpleCaseFoldedString = + SimpleCaseFoldedString(x.toString + y.toString) + + override def combineAll(xs: IterableOnce[SimpleCaseFoldedString]): SimpleCaseFoldedString = { + val sb: StringBuilder = new StringBuilder + xs.iterator.foreach(cfs => sb.append(cfs.toString)) + SimpleCaseFoldedString(sb.toString) + } + } +} diff --git a/core/src/main/scala/org/typelevel/ci/TurkicFullCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/TurkicFullCaseFoldedString.scala new file mode 100644 index 0000000..6378953 --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/TurkicFullCaseFoldedString.scala @@ -0,0 +1,29 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import scala.annotation.tailrec + +final case class TurkicFullCaseFoldedString private (override val toString: String) extends AnyVal + +object TurkicFullCaseFoldedString { + def apply(value: String): TurkicFullCaseFoldedString = + new TurkicFullCaseFoldedString(CaseFolding.turkicFullCaseFoldString(value)) + + val empty: TurkicFullCaseFoldedString = + apply("") +} diff --git a/core/src/main/scala/org/typelevel/ci/TurkicSimpleCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/TurkicSimpleCaseFoldedString.scala new file mode 100644 index 0000000..2359737 --- /dev/null +++ b/core/src/main/scala/org/typelevel/ci/TurkicSimpleCaseFoldedString.scala @@ -0,0 +1,29 @@ +/* + * Copyright 2020 Typelevel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.typelevel.ci + +import scala.annotation.tailrec + +final case class TurkicSimpleCaseFoldedString private (override val toString: String) extends AnyVal + +object TurkicSimpleCaseFoldedString { + def apply(value: String): TurkicSimpleCaseFoldedString = + new TurkicSimpleCaseFoldedString(CaseFolding.turkicSimpleCaseFoldString(value)) + + val empty: TurkicSimpleCaseFoldedString = + apply("") +} diff --git a/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala b/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala index 288e9aa..300b4bf 100644 --- a/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala +++ b/testing/src/main/scala/org/typelevel/ci/testing/arbitraries.scala @@ -41,9 +41,7 @@ object arbitraries { val surrogatePairStrings: Gen[String] = // Any Unicode codepoint >= 0x10000 is represented on the JVM by a // surrogate pair of two character values. - Gen.choose(0x10000, 0x10ffff).map(codePoint => - new String(Array(codePoint), 0, 1) - ) + Gen.choose(0x10000, 0x10ffff).map(codePoint => new String(Array(codePoint), 0, 1)) val titleCaseStrings: Gen[String] = { @tailrec @@ -73,36 +71,9 @@ object arbitraries { implicit val shrinkForCIString: Shrink[CIString] = { val stringShrink: Shrink[String] = implicitly[Shrink[String]] - Shrink( - x => stringShrink.shrink(x.toString).map(CIString.apply) - ) + Shrink(x => stringShrink.shrink(x.toString).map(CIString.apply)) } - implicit val cogenForOrgTypelevelCiCIString: Cogen[CIString] = Cogen[String].contramap(ci => new String(ci.toString.toArray.map(_.toLower))) - - implicit val arbCaseFoldedString: Arbitrary[CaseFoldedString] = - Arbitrary( - arbitrary[String].flatMap(value => - Gen.oneOf( - CaseFoldedString(value), - CaseFoldedString(value, true) // Turkic folding rules - ) - ) - ) - - implicit val cogenForCaseFoldedString: Cogen[CaseFoldedString] = - Cogen[String].contramap(_.toString) - - @nowarn("cat=deprecation") - implicit val shrinkCaseFoldedString: Shrink[CaseFoldedString] = { - import scala.collection.immutable.Stream - val stringShrink: Shrink[String] = implicitly[Shrink[String]] - Shrink( - x => stringShrink.shrink(x.toString).flatMap(value => - Stream(CaseFoldedString(value), CaseFoldedString(value, true)) - ) - ) - } } diff --git a/tests/jvm/src/test/scala/org/typelevel/ci/CaseFoldedStringJVMSuite.scala b/tests/jvm/src/test/scala/org/typelevel/ci/CaseFoldedStringJVMSuite.scala deleted file mode 100644 index 6aa2018..0000000 --- a/tests/jvm/src/test/scala/org/typelevel/ci/CaseFoldedStringJVMSuite.scala +++ /dev/null @@ -1,25 +0,0 @@ -package org.typelevel.ci - -import cats.implicits._ -import java.io._ -import munit.ScalaCheckSuite -import org.typelevel.ci.testing.arbitraries._ -import org.scalacheck.Prop._ - -final class CaseFoldedStringJVMSuite extends ScalaCheckSuite { - property("serialization round trips") { - def roundTrip[A](x: A): A = { - val baos = new ByteArrayOutputStream - val oos = new ObjectOutputStream(baos) - oos.writeObject(x) - oos.close() - val bais = new ByteArrayInputStream(baos.toByteArray) - val ois = new ObjectInputStream(bais) - ois.readObject().asInstanceOf[A] - } - - forAll { (x: CaseFoldedString) => - x.eqv(roundTrip(x)) - } - } -} diff --git a/tests/shared/src/test/scala/org/typelevel/ci/CIStringSuite.scala b/tests/shared/src/test/scala/org/typelevel/ci/CIStringSuite.scala index fd8cae2..7c37258 100644 --- a/tests/shared/src/test/scala/org/typelevel/ci/CIStringSuite.scala +++ b/tests/shared/src/test/scala/org/typelevel/ci/CIStringSuite.scala @@ -190,7 +190,8 @@ class CIStringSuite extends DisciplineSuite { } // Test name copied from java.lang.Character.getName(), I know it's long... - test("GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI should compare equal with upper and loser case invocations"){ + test( + "GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI should compare equal with upper and loser case invocations") { val codePoint: Int = 8085 // Unicode codepoint of lower case value val lower: String = (new String(Character.toChars(codePoint))).toLowerCase val upper: String = lower.toUpperCase diff --git a/tests/shared/src/test/scala/org/typelevel/ci/CaseFoldedStringSuite.scala b/tests/shared/src/test/scala/org/typelevel/ci/CaseFoldedStringSuite.scala deleted file mode 100644 index 7d70aad..0000000 --- a/tests/shared/src/test/scala/org/typelevel/ci/CaseFoldedStringSuite.scala +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright 2020 Typelevel - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.typelevel.ci - -import cats.implicits._ -import cats.kernel.laws.discipline._ -import munit.DisciplineSuite -import org.scalacheck.Prop._ -import org.typelevel.ci.testing.arbitraries._ -import scala.math.signum -import scala.annotation.tailrec - -final class CaseFoldedStringSuite extends DisciplineSuite { - property("case insensitive equality") { - forAll { (x: CaseFoldedString) => - if (x.toString.contains('\u0131')) { - // '\u0131' is LATIN SMALL LETTER DOTLESS I The .toUpper on this - // character will yield a 'I', but the Unicode standard for case - // folding states \u0131 is only case insensitively equivalent to 'I' - // for Turkic languages and by default this mapping should not be - // used. - val y = CaseFoldedString(x.toString.toLowerCase()) - val z = CaseFoldedString(x.toString.toUpperCase()) - assertNotEquals(y, z) - } else { - val y = CaseFoldedString(x.toString.toLowerCase()) - val z = CaseFoldedString(x.toString.toUpperCase()) - val t = CaseFoldedString(CaseFoldedStringSuite.toTitleCase(x.toString)) - assertEquals(y, z) - assertEquals(y, t) - assertEquals(t, z) - } - } - } - - test("case insensitive comparison") { - assert(CaseFoldedString("case-insensitive") < CaseFoldedString("CI")) - } - - property("reflexive comparison") { - forAll { (x: CaseFoldedString) => - assertEquals(x.compare(x), 0) - } - } - - property("equality consistent with comparison") { - forAll { (x: CaseFoldedString, y: CaseFoldedString) => - assertEquals((x == y), (x.compare(y) == 0)) - } - } - - property("hashCode consistent with equality") { - forAll { (x: CaseFoldedString, y: CaseFoldedString) => - assert((x != y) || (x.hashCode == y.hashCode)) - } - } - - test("isEmpty is true given an empty string") { - assert(CaseFoldedString("").isEmpty) - } - - test("isEmpty is false given a non-empty string") { - assert(!CaseFoldedString("non-empty string").isEmpty) - } - - property("is never equal to .nonEmpty for any given string") { - forAll { (ci: CaseFoldedString) => - assert(ci.isEmpty != ci.nonEmpty) - } - } - - test("nonEmpty is true given a non-empty string") { - assert(CaseFoldedString("non-empty string").nonEmpty) - } - - test("nonEmpty is false given an empty string") { - assert(!CaseFoldedString("").nonEmpty) - } - - test("trim removes leading whitespace") { - assert(CaseFoldedString(" text").trim == CaseFoldedString("text")) - } - - test("removes trailing whitespace") { - assert(CaseFoldedString("text ").trim == CaseFoldedString("text")) - } - - test("removes leading and trailing whitespace") { - assert(CaseFoldedString(" text ").trim == CaseFoldedString("text")) - } - - // property("ci interpolator is consistent with apply") { - // forAll { (s: String) => - // assertEquals(ci"$s", CaseFoldedString(s)) - // } - // } - - // property("ci interpolator handles expressions") { - // forAll { (x: Int, y: Int) => - // assertEquals(ci"${x + y}", CaseFoldedString((x + y).toString)) - // } - // } - - // property("ci interpolator handles multiple parts") { - // forAll { (a: String, b: String, c: String) => - // assertEquals(ci"$a:$b:$c", CaseFoldedString(s"$a:$b:$c")) - // } - // } - - // property("ci interpolator extractor is case-insensitive") { - // forAll { (s: String) => - // assert(CaseFoldedString(new String(s.toString.toArray.map(_.toUpper))) match { - // case ci"${t}" => t == CaseFoldedString(s) - // case _ => false - // }) - - // assert(CaseFoldedString(new String(s.toString.toArray.map(_.toLower))) match { - // case ci"${t}" => t == CaseFoldedString(s) - // case _ => false - // }) - // } - // } - - // test("ci interpolator extracts multiple parts") { - // assert(CaseFoldedString("Hello, Aretha") match { - // case ci"${greeting}, ${name}" => greeting == ci"Hello" && name == ci"Aretha" - // }) - // } - - // test("ci interpolator matches literals") { - // assert(CaseFoldedString("literally") match { - // case ci"LiTeRaLlY" => true - // case _ => false - // }) - // } - - // Test name copied from java.lang.Character.getName(), I know it's long... - test("GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI should compare equal with upper and loser case invocations"){ - val codePoint: Int = 8085 // Unicode codepoint of lower case value - val lower: String = (new String(Character.toChars(codePoint))).toLowerCase - val upper: String = lower.toUpperCase - val title: String = lower.map(c => Character.toTitleCase(c)).mkString - assertEquals(CaseFoldedString(lower), CaseFoldedString(upper)) - assertEquals(CaseFoldedString(lower), CaseFoldedString(title)) - assertEquals(CaseFoldedString(title), CaseFoldedString(upper)) - } - - checkAll("Order[CaseFoldedString]", OrderTests[CaseFoldedString].order) - checkAll("Hash[CaseFoldedString]", HashTests[CaseFoldedString].hash) - checkAll("LowerBounded[CaseFoldedString]", LowerBoundedTests[CaseFoldedString].lowerBounded) - checkAll("Monoid[CaseFoldedString]", MonoidTests[CaseFoldedString].monoid) -} - -object CaseFoldedStringSuite { - def mapStringByCodepoint(f: Int => Int)(s: String): String = { - // Scala's wrapper class doesn't support appendCodePoint, so we need to - // explicitly use the java.lang.StringBuilder - val builder: java.lang.StringBuilder = new java.lang.StringBuilder(s.length) - - @tailrec - def loop(index: Int): String = - if (index >= s.length) { - builder.toString - } else { - val codePoint: Int = s.codePointAt(index) - builder.appendCodePoint(f(codePoint)) - val inc: Int = Character.charCount(codePoint) - loop(index + inc) - } - - loop(0) - } - - def toTitleCase(s: String): String = - mapStringByCodepoint(Character.toTitleCase)(s) -} From 3bc849d4da4629b7e657ed0712d1f76c86838464 Mon Sep 17 00:00:00 2001 From: David Strawn Date: Sun, 6 Feb 2022 16:45:49 -0700 Subject: [PATCH 08/10] Add Missing `new` Keywords --- .../org/typelevel/ci/CompatibilityFullCaseFoldedString.scala | 2 +- .../org/typelevel/ci/CompatibilitySimpleCaseFoldedString.scala | 2 +- .../typelevel/ci/CompatibilityTurkicFullCaseFoldedString.scala | 2 +- .../ci/CompatibilityTurkicSimpleCaseFoldedString.scala | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/typelevel/ci/CompatibilityFullCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CompatibilityFullCaseFoldedString.scala index 45fe782..f231207 100644 --- a/core/src/main/scala/org/typelevel/ci/CompatibilityFullCaseFoldedString.scala +++ b/core/src/main/scala/org/typelevel/ci/CompatibilityFullCaseFoldedString.scala @@ -62,7 +62,7 @@ object CompatibilityFullCaseFoldedString { // - Alice's Adventures In Wonderland, Chapter VII, by Lewis Carroll // // scalafmt:on - CompatibilityFullCaseFoldedString(nfkdNormal1) + new CompatibilityFullCaseFoldedString(nfkdNormal1) } val empty: CompatibilityFullCaseFoldedString = diff --git a/core/src/main/scala/org/typelevel/ci/CompatibilitySimpleCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CompatibilitySimpleCaseFoldedString.scala index bbf8744..c2992fa 100644 --- a/core/src/main/scala/org/typelevel/ci/CompatibilitySimpleCaseFoldedString.scala +++ b/core/src/main/scala/org/typelevel/ci/CompatibilitySimpleCaseFoldedString.scala @@ -43,7 +43,7 @@ object CompatibilitySimpleCaseFoldedString { val nfkdNormal1: String = Normalizer.normalize(caseFold1, Normalizer.Form.NFKD) - CompatibilitySimpleCaseFoldedString(nfkdNormal1) + new CompatibilitySimpleCaseFoldedString(nfkdNormal1) } val empty: CompatibilitySimpleCaseFoldedString = diff --git a/core/src/main/scala/org/typelevel/ci/CompatibilityTurkicFullCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CompatibilityTurkicFullCaseFoldedString.scala index e1cc63f..43de42e 100644 --- a/core/src/main/scala/org/typelevel/ci/CompatibilityTurkicFullCaseFoldedString.scala +++ b/core/src/main/scala/org/typelevel/ci/CompatibilityTurkicFullCaseFoldedString.scala @@ -44,7 +44,7 @@ object CompatibilityTurkicFullCaseFoldedString { val nfkdNormal1: String = Normalizer.normalize(caseFold1, Normalizer.Form.NFKD) - CompatibilityTurkicFullCaseFoldedString(nfkdNormal1) + new CompatibilityTurkicFullCaseFoldedString(nfkdNormal1) } val empty: CompatibilityTurkicFullCaseFoldedString = diff --git a/core/src/main/scala/org/typelevel/ci/CompatibilityTurkicSimpleCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CompatibilityTurkicSimpleCaseFoldedString.scala index 6c3be24..cbaf670 100644 --- a/core/src/main/scala/org/typelevel/ci/CompatibilityTurkicSimpleCaseFoldedString.scala +++ b/core/src/main/scala/org/typelevel/ci/CompatibilityTurkicSimpleCaseFoldedString.scala @@ -44,7 +44,7 @@ object CompatibilityTurkicSimpleCaseFoldedString { val nfkdNormal1: String = Normalizer.normalize(caseFold1, Normalizer.Form.NFKD) - CompatibilityTurkicSimpleCaseFoldedString(nfkdNormal1) + new CompatibilityTurkicSimpleCaseFoldedString(nfkdNormal1) } val empty: CompatibilityTurkicSimpleCaseFoldedString = From 619b36ff58abd5c318c59475e13c7ce46c7060f5 Mon Sep 17 00:00:00 2001 From: David Strawn Date: Mon, 7 Feb 2022 07:35:34 -0700 Subject: [PATCH 09/10] Define CIString To Be A CanonicalFullCaseFoldedString This allows us to not deprecate the type. There is still an unfortunate issue here. Full case folding can change the length (in terms of characters) of the `String`. This makes `CIString.length` not safe to use, as it is not clear which length it now refers to. This commit deprecates that method because of this. However, the `glob` based `unapplySeq` relies both on `length` and `Char` based case insensitivity. This means that it is no longer correct. That said, by relying on `Char` based case insensitivity, it was probably not quite correct to begin with. --- .../scala/org/typelevel/ci/CIString.scala | 76 +++++++++++---- .../scala/org/typelevel/ci/CIStringCF.scala | 41 -------- .../ci/CanonicalFullCaseFoldedString.scala | 96 ++++++++++++++++++- 3 files changed, 150 insertions(+), 63 deletions(-) delete mode 100644 core/src/main/scala/org/typelevel/ci/CIStringCF.scala diff --git a/core/src/main/scala/org/typelevel/ci/CIString.scala b/core/src/main/scala/org/typelevel/ci/CIString.scala index f930b25..f1489a7 100644 --- a/core/src/main/scala/org/typelevel/ci/CIString.scala +++ b/core/src/main/scala/org/typelevel/ci/CIString.scala @@ -22,30 +22,68 @@ import java.io.Serializable import org.typelevel.ci.compat._ import scala.math.Ordered -@deprecated( - message = - "Please use either CIStringCF, CIStringCS, or CIStringS instead. CIString/CIStringS implement Unicode default caseless matching on simple case folded strings. For most applications you probably want to use CIStringCF which implements Unicode canonical caseless matching on full case folded strings.", - since = "1.3.0") -final class CIString private (override val toString: String, val asCIStringS: CIStringS) +/** A case insensitive representation of a `String`. + * + * There are several different ways to define a case insensitive match with Unicode. According to + * the Unicode standard, this is the "most correct" definition. If you are just looking for a case + * insensitive `String`, you should either use this or [[CanonicalFullCaseFoldedString]]. + * + * The only difference is whether or not you want to keep track of the original input `String` + * value. If you don't care about that, then [[CanonicalFullCaseFoldedString]] uses less memory and + * is likely ''slightly'' faster for most operations. + * + * {{{ + * scala> CIString("ß") + * val res0: org.typelevel.ci.CIString = ß + * + * scala> CanonicalFullCaseFoldedString("ß") + * val res1: org.typelevel.ci.CanonicalFullCaseFoldedString = ss + * + * scala> res0.asCanonicalFullCaseFoldedString == res1 + * val res2: Boolean = true + * + * scala> res0.toString + * val res3: String = ß + * + * scala> res1.toString + * val res4: String = ss + * + * scala> res0.asCanonicalFullCaseFoldedString.toString + * val res5: String = ss + * }}} + * + * @see + * [[https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf#G34145 Unicode Caseless Matching]] + */ +final class CIString private (override val toString: String) extends Ordered[CIString] with Serializable { - @deprecated(message = "Please provide a CaseFoldedString directly.", since = "1.3.0") - private def this(toString: String) = - this(toString, CIStringS(toString)) + /** The [[CanonicalFullCaseFoldedString]] representation of this `String`. + * + * This is the input `String`, case folded using full Unicode case folding (without the Turkic + * rules), and normalized for Unicode canonical caseless matching. + * + * For any two given Unicode text value, they are considered canonically caseless equivalent to + * each other if they both result in this [[CanonicalFullCaseFoldedString]]. + */ + lazy val asCanonicalFullCaseFoldedString: CanonicalFullCaseFoldedString = + CanonicalFullCaseFoldedString(this.toString) override def equals(that: Any): Boolean = that match { case that: CIString => - this.asCIStringS == that.asCIStringS + asCanonicalFullCaseFoldedString == that.asCanonicalFullCaseFoldedString case _ => false } override def hashCode(): Int = - this.asCIStringS.hashCode + this.asCanonicalFullCaseFoldedString.hashCode override def compare(that: CIString): Int = - Order[CIStringS].compare(asCIStringS, that.asCIStringS) + Order[CanonicalFullCaseFoldedString].compare( + asCanonicalFullCaseFoldedString, + that.asCanonicalFullCaseFoldedString) def transform(f: String => String): CIString = CIString(f(toString)) @@ -55,7 +93,11 @@ final class CIString private (override val toString: String, val asCIStringS: CI def trim: CIString = transform(_.trim) - def length: Int = toString.length + @deprecated( + message = + "Please use asCanonicalFullCaseFoldedString.length or toString.length, depending on your use case, instead. CIString represents a Unicode canonical caseless string with full case folding. Full case folding can change the length (in terms of number of Char values) of a String. This makes length on CIString confusing to use because it is unclear which length this method refers to. As 1.3.0 it is defined to refer to the length of the full case folded representation of the String, since this will be the same for all input Strings.", + since = "1.3.0") + def length: Int = asCanonicalFullCaseFoldedString.toString.length @deprecated("Use toString", "0.1.0") def value: String = toString @@ -64,17 +106,9 @@ final class CIString private (override val toString: String, val asCIStringS: CI @suppressUnusedImportWarningForCompat object CIString { - @deprecated( - message = - "Please use either CIStringCF, CIStringCS, or CIStringS instead. CIString/CIStringS implement Unicode default caseless matching on simple case folded strings. For most applications you probably want to use CIStringCF which implements Unicode canonical caseless matching on full case folded strings.", - since = "1.3.0") def apply(value: String): CIString = - new CIString(value, CIStringS(value)) + new CIString(value) - @deprecated( - message = - "Please use either CIStringCF, CIStringCS, or CIStringS instead. CIString/CIStringS implement Unicode default caseless matching on simple case folded strings. For most applications you probably want to use CIStringCF which implements Unicode canonical caseless matching on full case folded strings.", - since = "1.3.0") val empty = CIString("") implicit val catsInstancesForOrgTypelevelCIString: Order[CIString] diff --git a/core/src/main/scala/org/typelevel/ci/CIStringCF.scala b/core/src/main/scala/org/typelevel/ci/CIStringCF.scala deleted file mode 100644 index 957013c..0000000 --- a/core/src/main/scala/org/typelevel/ci/CIStringCF.scala +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2020 Typelevel - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.typelevel.ci - -final class CIStringCF private ( - override val toString: String, - val asCanonicalFullCaseFoldedString: CanonicalFullCaseFoldedString) - extends Serializable { - override def equals(that: Any): Boolean = - that match { - case that: CIStringCF => - asCanonicalFullCaseFoldedString == that.asCanonicalFullCaseFoldedString - case _ => - false - } - - override def hashCode(): Int = - asCanonicalFullCaseFoldedString.hashCode -} - -object CIStringCF { - def apply(value: String): CIStringCF = - new CIStringCF( - value, - CanonicalFullCaseFoldedString(value) - ) -} diff --git a/core/src/main/scala/org/typelevel/ci/CanonicalFullCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CanonicalFullCaseFoldedString.scala index 218365a..3234faa 100644 --- a/core/src/main/scala/org/typelevel/ci/CanonicalFullCaseFoldedString.scala +++ b/core/src/main/scala/org/typelevel/ci/CanonicalFullCaseFoldedString.scala @@ -16,9 +16,50 @@ package org.typelevel.ci +import cats._ +import cats.kernel._ +import cats.syntax.all._ import java.text.Normalizer -import scala.annotation.tailrec +/** A caseless `String`, normalized using Unicode canonical caseless matching + * and full case folding. According to the Unicode standard this is the "most + * correct" method of caseless matching. If you are looking for a + * caseless/case insensitive `String` and have no other requirements, you + * should either use this or [[CIString]]. The difference between the two is + * that [[CIString]] keeps a reference to the original input `String` (before + * normalization and case folding), and this type does not. If you don't need + * the original input `String` value, just a caseless version of it, this + * type will be more efficient in terms of computation and memory for + * ''most'' applications. + * + * "Canonical" has a specific meaning in Unicode. From the standard, + * + * {{{ + * Canonical equivalence is a fundamental equivalency between characters + * or sequences of characters which represent the same abstract character, + * and which when correctly displayed should always have the same visual + * appearance and behavior. + * }}} + * + * The definition of canonical caseless equivalence in Unicode is, + * + * {{{ + * D145 A string X is a canonical caseless match for a string Y if and only if: + * NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y ))) + * }}} + * + * Where "NFD" is the function which performs "Canonical Decomposition" and + * "toCasefold" is one, of several, case folding operations. This type uses + * full case folding, without the special rules for some Turkic languages. + * + * Thus, the `String` in this type is the result of applying + * `NFD(toCasefold(NFD(X)))` to the input `String`, `X`. + * + * @see + * [[https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf#G34145 Unicode Caseless Matching]] + * @see [[https://www.unicode.org/reports/tr15/#Canon_Compat_Equivalence Canonical Equivalence]] + * @see [[https://www.unicode.org/reports/tr15/#Norm_Forms Unicode Normal Forms]] + */ final case class CanonicalFullCaseFoldedString private (override val toString: String) extends AnyVal @@ -26,6 +67,14 @@ object CanonicalFullCaseFoldedString { def apply(value: String): CanonicalFullCaseFoldedString = new CanonicalFullCaseFoldedString( Normalizer.normalize( + // Note, the first application of NFD prescribed by the Unicode + // standard is to handle some very rare edge cases which can change + // the result even after applying the second, outer, NFD application + // after case folding. These edge cases are so rare that the standard + // recommends checking to see if the given input string is in normal + // form first, as that will likely be cheaper than normalizing. + // + // However, we always have to normalize _after_ case folding. if (Normalizer.isNormalized(value, Normalizer.Form.NFD)) { CaseFolding.fullCaseFoldString(value) } else { @@ -37,4 +86,49 @@ object CanonicalFullCaseFoldedString { val empty: CanonicalFullCaseFoldedString = apply("") + + implicit val hashAndOrderForCanonicalFullCaseFoldedString + : Hash[CanonicalFullCaseFoldedString] with Order[CanonicalFullCaseFoldedString] = + new Hash[CanonicalFullCaseFoldedString] with Order[CanonicalFullCaseFoldedString] { + override def hash(x: CanonicalFullCaseFoldedString): Int = + x.hashCode + + override def compare( + x: CanonicalFullCaseFoldedString, + y: CanonicalFullCaseFoldedString): Int = + x.toString.compare(y.toString) + } + + implicit val orderingForCanonicalFullCaseFoldedString: Ordering[CanonicalFullCaseFoldedString] = + hashAndOrderForCanonicalFullCaseFoldedString.toOrdering + + implicit val showForCanonicalFullCaseFoldedString: Show[CanonicalFullCaseFoldedString] = + Show.fromToString + + implicit val lowerBoundForCanonicalFullCaseFoldedString + : LowerBounded[CanonicalFullCaseFoldedString] = + new LowerBounded[CanonicalFullCaseFoldedString] { + override val partialOrder: PartialOrder[CanonicalFullCaseFoldedString] = + hashAndOrderForCanonicalFullCaseFoldedString + + override val minBound: CanonicalFullCaseFoldedString = + empty + } + + implicit val monoidForCanonicalFullCaseFoldedString: Monoid[CanonicalFullCaseFoldedString] = + new Monoid[CanonicalFullCaseFoldedString] { + override val empty: CanonicalFullCaseFoldedString = CanonicalFullCaseFoldedString.empty + + override def combine( + x: CanonicalFullCaseFoldedString, + y: CanonicalFullCaseFoldedString): CanonicalFullCaseFoldedString = + CanonicalFullCaseFoldedString(x.toString + y.toString) + + override def combineAll( + xs: IterableOnce[CanonicalFullCaseFoldedString]): CanonicalFullCaseFoldedString = { + val sb: StringBuilder = new StringBuilder + xs.iterator.foreach(cfs => sb.append(cfs.toString)) + CanonicalFullCaseFoldedString(sb.toString) + } + } } From 47e5980750d0f674ca59fd36cdfe7366950b7423 Mon Sep 17 00:00:00 2001 From: David Strawn Date: Fri, 27 May 2022 12:43:29 -0600 Subject: [PATCH 10/10] Add More Docs --- .../ci/CanonicalFullCaseFoldedString.scala | 35 ++++++------ .../ci/CanonicalSimpleCaseFoldedString.scala | 37 ++++++++++++ .../CanonicalTurkicFullCaseFoldedString.scala | 57 +++++++++++++++++++ ...anonicalTurkicSimpleCaseFoldedString.scala | 6 ++ .../scala/org/typelevel/ci/CaseFolding.scala | 56 ++++++++++++++++++ 5 files changed, 174 insertions(+), 17 deletions(-) diff --git a/core/src/main/scala/org/typelevel/ci/CanonicalFullCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CanonicalFullCaseFoldedString.scala index 3234faa..47abcdc 100644 --- a/core/src/main/scala/org/typelevel/ci/CanonicalFullCaseFoldedString.scala +++ b/core/src/main/scala/org/typelevel/ci/CanonicalFullCaseFoldedString.scala @@ -21,16 +21,15 @@ import cats.kernel._ import cats.syntax.all._ import java.text.Normalizer -/** A caseless `String`, normalized using Unicode canonical caseless matching - * and full case folding. According to the Unicode standard this is the "most - * correct" method of caseless matching. If you are looking for a - * caseless/case insensitive `String` and have no other requirements, you - * should either use this or [[CIString]]. The difference between the two is - * that [[CIString]] keeps a reference to the original input `String` (before - * normalization and case folding), and this type does not. If you don't need - * the original input `String` value, just a caseless version of it, this - * type will be more efficient in terms of computation and memory for - * ''most'' applications. +/** A caseless `String`, normalized using Unicode canonical caseless matching and full case folding. + * According to the Unicode standard this is the "most correct" method of caseless matching. If you + * are looking for a caseless/case insensitive `String` and have no other requirements, you should + * either use this or [[CIString]]. The difference between the two is that [[CIString]] keeps a + * reference to the original input `String` (before normalization and case folding), and this type + * does not. If you don't need the original input `String` value, just a caseless version of it, + * this type will be more efficient in terms of computation and memory for ''most'' applications. + * + * "Full" case folding can change the number of code points in the input string. * * "Canonical" has a specific meaning in Unicode. From the standard, * @@ -48,17 +47,19 @@ import java.text.Normalizer * NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y ))) * }}} * - * Where "NFD" is the function which performs "Canonical Decomposition" and - * "toCasefold" is one, of several, case folding operations. This type uses - * full case folding, without the special rules for some Turkic languages. + * Where "NFD" is the function which performs "Canonical Decomposition" and "toCasefold" is one, of + * several, case folding operations. This type uses full case folding, without the special rules + * for some Turkic languages. * - * Thus, the `String` in this type is the result of applying - * `NFD(toCasefold(NFD(X)))` to the input `String`, `X`. + * Thus, the `String` in this type is the result of applying `NFD(toCasefold(NFD(X)))` to the input + * `String`, `X`. * * @see * [[https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf#G34145 Unicode Caseless Matching]] - * @see [[https://www.unicode.org/reports/tr15/#Canon_Compat_Equivalence Canonical Equivalence]] - * @see [[https://www.unicode.org/reports/tr15/#Norm_Forms Unicode Normal Forms]] + * @see + * [[https://www.unicode.org/reports/tr15/#Canon_Compat_Equivalence Canonical Equivalence]] + * @see + * [[https://www.unicode.org/reports/tr15/#Norm_Forms Unicode Normal Forms]] */ final case class CanonicalFullCaseFoldedString private (override val toString: String) extends AnyVal diff --git a/core/src/main/scala/org/typelevel/ci/CanonicalSimpleCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CanonicalSimpleCaseFoldedString.scala index b048100..b1baa24 100644 --- a/core/src/main/scala/org/typelevel/ci/CanonicalSimpleCaseFoldedString.scala +++ b/core/src/main/scala/org/typelevel/ci/CanonicalSimpleCaseFoldedString.scala @@ -22,6 +22,43 @@ import cats.syntax.all._ import java.text.Normalizer import scala.annotation.tailrec +/** A caseless `String`, normalized using Unicode canonical caseless matching and simple case + * folding. This differs from [[CanonicalFullCaseFoldedString]] in that it uses "simple" case + * folding. + * + * "Simple" case folding does not change the number of code points in the input string, unlike + * "Full" case folding which can. + * + * "Canonical" has a specific meaning in Unicode. From the standard, + * + * {{{ + * Canonical equivalence is a fundamental equivalency between characters + * or sequences of characters which represent the same abstract character, + * and which when correctly displayed should always have the same visual + * appearance and behavior. + * }}} + * + * The definition of canonical caseless equivalence in Unicode is, + * + * {{{ + * D145 A string X is a canonical caseless match for a string Y if and only if: + * NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y ))) + * }}} + * + * Where "NFD" is the function which performs "Canonical Decomposition" and "toCasefold" is one, of + * several, case folding operations. This type uses simple case folding, without the special rules + * for some Turkic languages. + * + * Thus, the `String` in this type is the result of applying `NFD(toCasefold(NFD(X)))` to the input + * `String`, `X`. + * + * @see + * [[https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf#G34145 Unicode Caseless Matching]] + * @see + * [[https://www.unicode.org/reports/tr15/#Canon_Compat_Equivalence Canonical Equivalence]] + * @see + * [[https://www.unicode.org/reports/tr15/#Norm_Forms Unicode Normal Forms]] + */ final case class CanonicalSimpleCaseFoldedString private (override val toString: String) extends AnyVal diff --git a/core/src/main/scala/org/typelevel/ci/CanonicalTurkicFullCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CanonicalTurkicFullCaseFoldedString.scala index 8b6b9d3..cb77e6f 100644 --- a/core/src/main/scala/org/typelevel/ci/CanonicalTurkicFullCaseFoldedString.scala +++ b/core/src/main/scala/org/typelevel/ci/CanonicalTurkicFullCaseFoldedString.scala @@ -16,9 +16,17 @@ package org.typelevel.ci +import cats._ +import cats.kernel._ import java.text.Normalizer import scala.annotation.tailrec +/** As [[CanonicalFullCaseFoldedString]], except it uses the special rules for certain Turkic + * languages during the case folding step. + * + * @see + * [[https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf#G34145 Unicode Caseless Matching]] + */ final case class CanonicalTurkicFullCaseFoldedString private (override val toString: String) extends AnyVal @@ -37,4 +45,53 @@ object CanonicalTurkicFullCaseFoldedString { val empty: CanonicalTurkicFullCaseFoldedString = apply("") + + implicit val hashAndOrderForCanonicalTurkicFullCaseFoldedString + : Hash[CanonicalTurkicFullCaseFoldedString] with Order[CanonicalTurkicFullCaseFoldedString] = + new Hash[CanonicalTurkicFullCaseFoldedString] with Order[CanonicalTurkicFullCaseFoldedString] { + override def hash(x: CanonicalTurkicFullCaseFoldedString): Int = + x.hashCode + + override def compare( + x: CanonicalTurkicFullCaseFoldedString, + y: CanonicalTurkicFullCaseFoldedString): Int = + x.toString.compare(y.toString) + } + + implicit val orderingForCanonicalTurkicFullCaseFoldedString + : Ordering[CanonicalTurkicFullCaseFoldedString] = + hashAndOrderForCanonicalTurkicFullCaseFoldedString.toOrdering + + implicit val showForCanonicalTurkicFullCaseFoldedString + : Show[CanonicalTurkicFullCaseFoldedString] = + Show.fromToString + + implicit val lowerBoundForCanonicalTurkicFullCaseFoldedString + : LowerBounded[CanonicalTurkicFullCaseFoldedString] = + new LowerBounded[CanonicalTurkicFullCaseFoldedString] { + override val partialOrder: PartialOrder[CanonicalTurkicFullCaseFoldedString] = + hashAndOrderForCanonicalTurkicFullCaseFoldedString + + override val minBound: CanonicalTurkicFullCaseFoldedString = + empty + } + + implicit val monoidForCanonicalTurkicFullCaseFoldedString + : Monoid[CanonicalTurkicFullCaseFoldedString] = + new Monoid[CanonicalTurkicFullCaseFoldedString] { + override val empty: CanonicalTurkicFullCaseFoldedString = + CanonicalTurkicFullCaseFoldedString.empty + + override def combine( + x: CanonicalTurkicFullCaseFoldedString, + y: CanonicalTurkicFullCaseFoldedString): CanonicalTurkicFullCaseFoldedString = + CanonicalTurkicFullCaseFoldedString(x.toString + y.toString) + + override def combineAll(xs: IterableOnce[CanonicalTurkicFullCaseFoldedString]) + : CanonicalTurkicFullCaseFoldedString = { + val sb: StringBuilder = new StringBuilder + xs.iterator.foreach(cfs => sb.append(cfs.toString)) + CanonicalTurkicFullCaseFoldedString(sb.toString) + } + } } diff --git a/core/src/main/scala/org/typelevel/ci/CanonicalTurkicSimpleCaseFoldedString.scala b/core/src/main/scala/org/typelevel/ci/CanonicalTurkicSimpleCaseFoldedString.scala index cf0d00b..e3c13b9 100644 --- a/core/src/main/scala/org/typelevel/ci/CanonicalTurkicSimpleCaseFoldedString.scala +++ b/core/src/main/scala/org/typelevel/ci/CanonicalTurkicSimpleCaseFoldedString.scala @@ -19,6 +19,12 @@ package org.typelevel.ci import java.text.Normalizer import scala.annotation.tailrec +/** As [[CanonicalSimpleCaseFoldedString]], except it uses the special rules for certain Turkic + * languages during the case folding step. + * + * @see + * [[https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf#G34145 Unicode Caseless Matching]] + */ final case class CanonicalTurkicSimpleCaseFoldedString private (override val toString: String) extends AnyVal diff --git a/core/src/main/scala/org/typelevel/ci/CaseFolding.scala b/core/src/main/scala/org/typelevel/ci/CaseFolding.scala index 72d1674..5e3a0a7 100644 --- a/core/src/main/scala/org/typelevel/ci/CaseFolding.scala +++ b/core/src/main/scala/org/typelevel/ci/CaseFolding.scala @@ -21,6 +21,20 @@ import scala.annotation.tailrec /** These are lookup tables for case folding. There are several different case folding algorithms * which can be employed with different trade offs. * + * The definition of case folding from the Unicode specification, + * + * {{{ + * Case folding is related to case conversion. However, the main purpose + * of case folding is to contribute to caseless matching of strings, whereas + * the main purpose of case conversion is to put strings into a particular + * cased form. + * }}} + * + * A case folded string is ''not'' a caseless string. The result of case folding a string does not + * in and of itself give a string which is ready to be compared for a caseless match. There are + * several types of caseless matching and for many of them one more additional transformations are + * required. + * * @note * Some case folding, in particular full case folding, can yield more codePoints than the * original value. That is, it can ''increase'' the size of `String` values once folded. @@ -44,6 +58,13 @@ private[ci] object CaseFolding { // 66: 12263 // }}} + /** Perform "full" case folding as defined in the Unicode Case folding tables. + * + * Full case folded strings can cause the number of code points in the string to change. + * + * @see + * [[https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt Unicode Case Folding Tables]] + */ def fullCaseFoldString(value: String): String = { val builder: java.lang.StringBuilder = new java.lang.StringBuilder(value.length * 3) @@ -61,6 +82,14 @@ private[ci] object CaseFolding { loop(0) } + /** Perform "full" case folding as defined in the Unicode Case folding tables, using the special + * rules for Turkic languages. + * + * Full case folded strings can cause the number of code points in the string to change. + * + * @see + * [[https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt Unicode Case Folding Tables]] + */ def turkicFullCaseFoldString(value: String): String = { val builder: java.lang.StringBuilder = new java.lang.StringBuilder(value.length * 3) @@ -78,6 +107,19 @@ private[ci] object CaseFolding { loop(0) } + /** Perform "simple" case folding as defined in the Unicode Case folding tables. + * + * Simple case folded strings will have the same number of code points after folding. + * + * @note + * Use of simple case folding is formally less correct than full case folding. It is intended + * only for circumstances where it a fixed size of the string is required, e.g. you are working + * on a fixed size buffer. If that restriction does not apply full case folding shold be + * preferred. + * + * @see + * [[https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt Unicode Case Folding Tables]] + */ def simpleCaseFoldString(value: String): String = { val builder: java.lang.StringBuilder = new java.lang.StringBuilder(value.length * 3) @@ -95,6 +137,20 @@ private[ci] object CaseFolding { loop(0) } + /** Perform "simple" case folding as defined in the Unicode Case folding tables, using the special + * rules for Turkic languages. + * + * Simple case folded strings will have the same number of code points after folding. + * + * @note + * Use of simple case folding is formally less correct than full case folding. It is intended + * only for circumstances where it a fixed size of the string is required, e.g. you are working + * on a fixed size buffer. If that restriction does not apply full case folding shold be + * preferred. + * + * @see + * [[https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt Unicode Case Folding Tables]] + */ def turkicSimpleCaseFoldString(value: String): String = { val builder: java.lang.StringBuilder = new java.lang.StringBuilder(value.length * 3)