apache · dajac · May 15, 2025 · Apr 21, 2025 · Apr 21, 2025 · Apr 21, 2025
diff --git a/LICENSE-binary b/LICENSE-binary
@@ -212,6 +212,7 @@ License Version 2.0:
 - commons-lang3-3.12.0
 - commons-logging-1.3.2
 - commons-validator-1.9.0
+- hash4j-0.22.0
 - jackson-annotations-2.16.2
 - jackson-core-2.16.2
 - jackson-databind-2.16.2

diff --git a/build.gradle b/build.gradle
@@ -1418,6 +1418,7 @@ project(':group-coordinator') {
     implementation libs.hdrHistogram
     implementation libs.re2j
     implementation libs.slf4jApi
+    implementation libs.hash4j
 
     testImplementation project(':clients').sourceSets.test.output
     testImplementation project(':server-common').sourceSets.test.output

diff --git a/checkstyle/import-control-group-coordinator.xml b/checkstyle/import-control-group-coordinator.xml
@@ -51,6 +51,7 @@
 
     <subpackage name="coordinator">
         <subpackage name="group">
+            <allow pkg="net.jpountz.xxhash" />
             <allow pkg="org.apache.kafka.clients.consumer" />
             <allow pkg="org.apache.kafka.common.annotation" />
             <allow pkg="org.apache.kafka.common.config" />
@@ -76,6 +77,7 @@
             <allow pkg="org.apache.kafka.coordinator.common" />
             <allow pkg="org.apache.kafka.coordinator.common.runtime" />
             <allow pkg="com.google.re2j" />
+            <allow pkg="com.dynatrace.hash4j.hashing" />
             <allow pkg="org.apache.kafka.metadata" />
             <subpackage name="metrics">
                 <allow pkg="com.yammer.metrics"/>

diff --git a/gradle/dependencies.gradle b/gradle/dependencies.gradle
@@ -127,7 +127,8 @@ versions += [
   // Also make sure the compression levels in org.apache.kafka.common.record.CompressionType are still valid
   zstd: "1.5.6-10",
   junitPlatform: "1.10.2",
-  hdrHistogram: "2.2.2"
+  hdrHistogram: "2.2.2",
+  hash4j: "0.22.0"
 ]
 
 libs += [
@@ -225,5 +226,6 @@ libs += [
   mavenArtifact: "org.apache.maven:maven-artifact:$versions.mavenArtifact",
   zstd: "com.github.luben:zstd-jni:$versions.zstd",
   httpclient: "org.apache.httpcomponents:httpclient:$versions.httpclient",
-  hdrHistogram: "org.hdrhistogram:HdrHistogram:$versions.hdrHistogram"
+  hdrHistogram: "org.hdrhistogram:HdrHistogram:$versions.hdrHistogram",
+  hash4j: "com.dynatrace.hash4j:hash4j:$versions.hash4j",
 ]
diff --git a/group-coordinator/src/main/java/org/apache/kafka/coordinator/group/Utils.java b/group-coordinator/src/main/java/org/apache/kafka/coordinator/group/Utils.java
@@ -25,10 +25,15 @@
 import org.apache.kafka.common.protocol.ApiMessage;
 import org.apache.kafka.coordinator.group.generated.ConsumerGroupCurrentMemberAssignmentValue;
 import org.apache.kafka.coordinator.group.generated.ShareGroupCurrentMemberAssignmentValue;
+import org.apache.kafka.image.ClusterImage;
+import org.apache.kafka.image.MetadataImage;
 import org.apache.kafka.image.TopicImage;
 import org.apache.kafka.image.TopicsImage;
+import org.apache.kafka.metadata.BrokerRegistration;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
 
+import com.dynatrace.hash4j.hashing.HashStream64;
+import com.dynatrace.hash4j.hashing.Hashing;
 import com.google.re2j.Pattern;
 import com.google.re2j.PatternSyntaxException;
 
@@ -324,4 +329,99 @@ static void throwIfRegularExpressionIsInvalid(
                     regex, ex.getDescription()));
         }
     }
+
+    /**
+     * The magic byte used to identify the version of topic hash function.
+     */
+    static final byte TOPIC_HASH_MAGIC_BYTE = 0x00;
+
+    /**
+     * Computes the hash of the topics in a group.
+     * <p>
+     * The computed hash value is stored as the metadata hash in the *GroupMetadataValue.
+     * <p>
+     * If there is no topic, the hash value is set to 0.
+     * The hashing process involves the following steps:
+     * 1. Sort the topic hashes by topic name.
+     * 2. Write each topic hash in order.
+     *
+     * @param topicHashes The map of topic hashes. Key is topic name and value is the topic hash.
+     * @return The hash of the group.
+     */
+    static long computeGroupHash(Map<String, Long> topicHashes) {
+        if (topicHashes.isEmpty()) {
+            return 0;
+        }
+
+        // Sort entries by topic name
+        List<Map.Entry<String, Long>> sortedEntries = new ArrayList<>(topicHashes.entrySet());
+        sortedEntries.sort(Map.Entry.comparingByKey());
+
+        HashStream64 hasher = Hashing.xxh3_64().hashStream();
+        for (Map.Entry<String, Long> entry : sortedEntries) {
+            hasher.putLong(entry.getValue());
+        }
+
+        return hasher.getAsLong();
+    }
+
+    /**
+     * Computes the hash of the topic id, name, number of partitions, and partition racks by streaming XXH3.
+     * <p>
+     * The computed hash value for the topic is utilized in conjunction with the {@link #computeGroupHash(Map)}
+     * method and is stored as part of the metadata hash in the *GroupMetadataValue.
+     * It is important to note that if the hash algorithm is changed, the magic byte must be updated to reflect the
+     * new hash version.
+     * <p>
+     * For non-existent topics, the hash value is set to 0.
+     * For existent topics, the hashing process involves the following steps:
+     * 1. Write a magic byte to denote the version of the hash function.
+     * 2. Write the hash code of the topic ID with mostSignificantBits and leastSignificantBits.
+     * 3. Write the topic name.
+     * 4. Write the number of partitions associated with the topic.
+     * 5. For each partition, write the partition ID and a sorted list of rack identifiers.
+     * - Rack identifiers are formatted as "<length1><value1><length2><value2>" to prevent issues with simple separators.
+     *
+     * @param topicName     The topic image.
+     * @param metadataImage The cluster image.
+     * @return The hash of the topic.
+     */
+    static long computeTopicHash(String topicName, MetadataImage metadataImage) {
+        TopicImage topicImage = metadataImage.topics().getTopic(topicName);
+        if (topicImage == null) {
+            return 0;
+        }
+
+        HashStream64 hasher = Hashing.xxh3_64().hashStream();
+        hasher = hasher
+            .putByte(TOPIC_HASH_MAGIC_BYTE)
+            .putLong(topicImage.id().getMostSignificantBits())
+            .putLong(topicImage.id().getLeastSignificantBits())
+            .putString(topicImage.name())
+            .putInt(topicImage.partitions().size());
+
+        ClusterImage clusterImage = metadataImage.cluster();
+        List<String> racks = new ArrayList<>();
+        for (int i = 0; i < topicImage.partitions().size(); i++) {
+            hasher = hasher.putInt(i);
+            racks.clear(); // Clear the list for reuse
+            for (int replicaId : topicImage.partitions().get(i).replicas) {
+                BrokerRegistration broker = clusterImage.broker(replicaId);
+                if (broker != null) {
+                    broker.rack().ifPresent(racks::add);
+                }
+            }
+
+            Collections.sort(racks);
+            for (String rack : racks) {
+                // Format: "<length><value>"
+                // The rack string combination cannot use simple separator like ",", because there is no limitation for rack character.
+                // If using simple separator like "," it may hit edge case like ",," and ",,," / ",,," and ",,".
+                // Add length before the rack string to avoid the edge case.
+                hasher = hasher.putInt(rack.length()).putString(rack);
+            }
+        }
+
+        return hasher.getAsLong();
+    }
 }