airlift · wendigo · Feb 5, 2025 · dain · Oct 22, 2025 · dain
diff --git a/src/main/java/io/airlift/slice/SliceUtf8.java b/src/main/java/io/airlift/slice/SliceUtf8.java
@@ -299,6 +299,56 @@ public static Slice toLowerCase(Slice utf8)
         return translateCodePoints(utf8, LOWER_CODE_POINTS);
     }
 
+    public static Slice toTitleCase(Slice utf8)
+    {
+        int length = utf8.length();
+        Slice newUtf8 = Slices.allocate(length);
+
+        int position = 0;
+        int upperPosition = 0;
+        boolean upperNext = true;
+        while (position < length) {
+            int codePoint = tryGetCodePointAt(utf8, position);
+            if (codePoint >= 0) {
+                int upperCodePoint = LOWER_CODE_POINTS[codePoint];
+                if (upperNext) {
+                    upperCodePoint = UPPER_CODE_POINTS[codePoint];
+                    upperNext = false;
+                }
+
+                if (WHITESPACE_CODE_POINTS[codePoint]) {
+                    upperNext = true;
+                }
+
+                // grow slice if necessary
+                int nextUpperPosition = upperPosition + lengthOfCodePoint(upperCodePoint);
+                if (nextUpperPosition > length) {
+                    newUtf8 = Slices.ensureSize(newUtf8, nextUpperPosition);
+                }
+
+                // write new byte
+                setCodePointAt(upperCodePoint, newUtf8, upperPosition);
+
+                position += lengthOfCodePoint(codePoint);
+                upperPosition = nextUpperPosition;
+            }
+            else {
+                int skipLength = -codePoint;
+
+                // grow slice if necessary
+                int nextUpperPosition = upperPosition + skipLength;
+                if (nextUpperPosition > length) {
+                    newUtf8 = Slices.ensureSize(newUtf8, nextUpperPosition);
+                }
+
+                copyUtf8SequenceUnsafe(utf8, position, newUtf8, upperPosition, skipLength);
+                position += skipLength;
+                upperPosition = nextUpperPosition;
+            }
+        }
+        return newUtf8.slice(0, upperPosition);
+    }
+
     private static Slice translateCodePoints(Slice utf8, int[] codePointTranslationMap)
     {
         int length = utf8.length();

diff --git a/src/test/java/io/airlift/slice/TestSlice.java b/src/test/java/io/airlift/slice/TestSlice.java
@@ -30,6 +30,7 @@
 import static io.airlift.slice.SizeOf.SIZE_OF_SHORT;
 import static io.airlift.slice.SizeOf.instanceSize;
 import static io.airlift.slice.SizeOf.sizeOfByteArray;
+import static io.airlift.slice.SliceUtf8.toTitleCase;
 import static io.airlift.slice.Slices.EMPTY_SLICE;
 import static io.airlift.slice.Slices.utf8Slice;
 import static java.lang.Double.doubleToLongBits;
@@ -196,6 +197,17 @@ public void testUtf8Conversion()
         assertThat(utf8Slice(s).toStringUtf8()).isEqualTo(s);
     }
 
+    @Test
+    public void testUtf8TitleCaseConversion()
+    {
+        String s = "apple \u2603 snowman";
+        Slice slice = Slices.copiedBuffer(s, UTF_8);
+
+        assertThat(toTitleCase(utf8Slice(s))).isEqualTo(toTitleCase(slice));
+        assertThat(toTitleCase(slice).toStringUtf8()).isEqualTo("Apple \u2603 Snowman");
+        assertThat(utf8Slice(s).toStringUtf8()).isEqualTo(s);
+    }
+
     @SuppressWarnings("CharUsedInArithmeticContext")
     private static void assertToStrings(Slice slice, int index)
     {