diff --git a/helix-core/src/main/java/org/apache/helix/ClusterMessagingService.java b/helix-core/src/main/java/org/apache/helix/ClusterMessagingService.java
index f31f0e397a..09c82b2fa2 100644
--- a/helix-core/src/main/java/org/apache/helix/ClusterMessagingService.java
+++ b/helix-core/src/main/java/org/apache/helix/ClusterMessagingService.java
@@ -37,6 +37,13 @@
public interface ClusterMessagingService {
/**
* Send message matching the specifications mentioned in recipientCriteria.
+ *
+ *
PERFORMANCE WARNING: When recipientCriteria uses {@link DataSource#EXTERNALVIEW}
+ * with wildcard or unspecified resource names, this scans ALL ExternalView znodes in the cluster,
+ * regardless of other criteria like instanceName. At scale, this causes
+ * severe performance degradation. Use {@link DataSource#LIVEINSTANCES} when you don't need
+ * resource/partition filtering, or specify exact resource names when using EXTERNALVIEW.
+ *
* @param recipientCriteria criteria to be met, defined as {@link Criteria}
* @See Criteria
* @param message
@@ -54,6 +61,7 @@ public interface ClusterMessagingService {
* This method will return after sending the messages.
* This is useful when message need to be sent and current thread need not
* wait for response since processing will be done in another thread.
+ *
* @see #send(Criteria, Message)
* @param recipientCriteria
* @param message
@@ -85,7 +93,8 @@ int send(Criteria recipientCriteria, Message message, AsyncCallback callbackOnRe
* for response.
* The current thread can use callbackOnReply instance to store application
* specific data.
- * @see #send(Criteria, Message, AsyncCallback, int)
+ *
+ * @see #send(Criteria, Message)
* @param recipientCriteria
* @param message
* @param callbackOnReply
@@ -96,7 +105,7 @@ int sendAndWait(Criteria recipientCriteria, Message message, AsyncCallback callb
int timeOut);
/**
- * @see #send(Criteria, Message, AsyncCallback, int, int)
+ * @see #send(Criteria, Message)
* @param receipientCriteria
* @param message
* @param callbackOnReply
@@ -143,6 +152,8 @@ int sendAndWait(Criteria receipientCriteria, Message message, AsyncCallback call
/**
* This will generate all messages to be sent given the recipientCriteria and MessageTemplate,
* the messages are not sent.
+ *
+ * @see #send(Criteria, Message)
* @param recipientCriteria criteria to be met, defined as {@link Criteria}
* @param messageTemplate the Message on which to base the messages to send
* @return messages to be sent, grouped by the type of instance to send the message to
diff --git a/helix-core/src/main/java/org/apache/helix/Criteria.java b/helix-core/src/main/java/org/apache/helix/Criteria.java
index d01228db83..1ddb429e1b 100644
--- a/helix-core/src/main/java/org/apache/helix/Criteria.java
+++ b/helix-core/src/main/java/org/apache/helix/Criteria.java
@@ -20,9 +20,43 @@
*/
/**
- * Describes various properties that operations involving {@link Message} delivery will follow.
+ * Specifies recipient criteria for message delivery in a Helix cluster.
+ *
+ *
PERFORMANCE WARNING: Using {@link DataSource#EXTERNALVIEW} with wildcard or unspecified
+ * resource names causes Helix to scan ALL ExternalView znodes in the cluster, regardless of other
+ * criteria fields. At scale, this causes severe performance degradation.
+ *
+ *
Example - Efficient Pattern:
+ *
+ * // GOOD: Target specific live instance
+ * Criteria criteria = new Criteria();
+ * criteria.setInstanceName("host_1234");
+ * criteria.setRecipientInstanceType(InstanceType.PARTICIPANT);
+ * criteria.setDataSource(DataSource.LIVEINSTANCES); // Fast
+ * criteria.setSessionSpecific(true);
+ *
+ * // BAD: Wildcard resource with ExternalView
+ * Criteria criteria = new Criteria();
+ * criteria.setInstanceName("host_1234");
+ * criteria.setDataSource(DataSource.EXTERNALVIEW);
+ * criteria.setResource("%"); // Scans ALL ExternalViews!
+ *
+ *
+ * DataSource Selection:
+ *
+ * - LIVEINSTANCES: Use when targeting live instances without resource/partition filtering. Fastest.
+ * - EXTERNALVIEW: Use when filtering by resource, partition, or replica state.
+ * ALWAYS specify exact resource names.
+ * - INSTANCES: Use for targeting all configured instances based on instance config.
+ * - IDEALSTATES: Use for targeting based on ideal state. Less common.
+ *
+ *
+ * @see ClusterMessagingService#send(Criteria, org.apache.helix.model.Message)
*/
public class Criteria {
+ /**
+ * Source of cluster state data for resolving message recipients.
+ */
public enum DataSource {
IDEALSTATES,
EXTERNALVIEW,
@@ -80,8 +114,12 @@ public DataSource getDataSource() {
}
/**
- * Set the current source of truth
- * @param source ideal state or external view
+ * Set the current source of truth for resolving message recipients.
+ *
+ * Prefer {@link DataSource#LIVEINSTANCES} when not filtering by resource/partition.
+ * If using {@link DataSource#EXTERNALVIEW}, specify exact resource names to avoid full scans.
+ *
+ * @param source ideal state, external view, live instances, or instances
*/
public void setDataSource(DataSource source) {
_dataSource = source;
@@ -161,8 +199,12 @@ public String getResource() {
}
/**
- * Set the destination resource name
- * @param resourceName the resource name or % for all resources
+ * Set the destination resource name.
+ *
+ *
Only meaningful for {@link DataSource#EXTERNALVIEW} or {@link DataSource#IDEALSTATES}.
+ * Using wildcard "%" with EXTERNALVIEW reads ALL ExternalView znodes - use exact names instead.
+ *
+ * @param resourceName the exact resource name, or "%" for all resources
*/
public void setResource(String resourceName) {
this.resourceName = resourceName;
diff --git a/helix-core/src/main/java/org/apache/helix/HelixManager.java b/helix-core/src/main/java/org/apache/helix/HelixManager.java
index c1d2ad18c5..cf3378314c 100644
--- a/helix-core/src/main/java/org/apache/helix/HelixManager.java
+++ b/helix-core/src/main/java/org/apache/helix/HelixManager.java
@@ -409,6 +409,8 @@ void addExternalViewChangeListener(org.apache.helix.ExternalViewChangeListener l
/**
* Messaging service which can be used to send cluster wide messages.
+ * See {@link ClusterMessagingService#send(Criteria, org.apache.helix.model.Message)} for usage.
+ *
* @return messaging service
*/
ClusterMessagingService getMessagingService();
diff --git a/helix-core/src/main/java/org/apache/helix/messaging/CriteriaEvaluator.java b/helix-core/src/main/java/org/apache/helix/messaging/CriteriaEvaluator.java
index f0e9ef58ff..5ef64f4c8f 100644
--- a/helix-core/src/main/java/org/apache/helix/messaging/CriteriaEvaluator.java
+++ b/helix-core/src/main/java/org/apache/helix/messaging/CriteriaEvaluator.java
@@ -39,12 +39,51 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+/**
+ * Evaluates {@link Criteria} against persisted Helix data to determine message recipients.
+ *
+ *
PERFORMANCE WARNING: When using {@link DataSource#EXTERNALVIEW}, this evaluator
+ * will scan all ExternalView znodes in the cluster if the resource name is unspecified or uses wildcards
+ * (e.g., "%" or "*"). This scanning happens even when targeting specific instances, and is
+ * NOT automatically optimized based on other criteria fields (like instanceName).
+ *
+ *
At high ExternalView cardinality, this can cause severe performance degradation.
+ *
+ *
Safer Patterns:
+ *
+ * - Use {@link DataSource#LIVEINSTANCES}: When you only need to target live instances
+ * and do not require resource/partition-level filtering. This reads only the LIVEINSTANCES
+ * znodes, which is typically much smaller and faster.
+ * - Specify exact resource names: If ExternalView is required, provide specific resource
+ * names in {@link Criteria#setResource(String)} instead of wildcards to limit the scan scope.
+ *
+ *
+ * Example - Targeting a specific instance:
+ *
+ * // BAD: Scans all ExternalViews even though instance is specified
+ * Criteria criteria = new Criteria();
+ * criteria.setInstanceName("instance123");
+ * criteria.setDataSource(DataSource.EXTERNALVIEW);
+ * criteria.setResource("%"); // wildcard triggers full scan
+ *
+ * // GOOD: Uses LIVEINSTANCES, avoids ExternalView scan
+ * Criteria criteria = new Criteria();
+ * criteria.setInstanceName("instance123");
+ * criteria.setDataSource(DataSource.LIVEINSTANCES);
+ *
+ */
public class CriteriaEvaluator {
private static Logger logger = LoggerFactory.getLogger(CriteriaEvaluator.class);
public static final String MATCH_ALL_SYM = "%";
/**
* Examine persisted data to match wildcards in {@link Criteria}
+ *
+ * PERFORMANCE WARNING: Using {@link DataSource#EXTERNALVIEW} with wildcard resource
+ * names (or unspecified resource) will scan ALL ExternalView znodes, even when targeting specific
+ * instances. At high cardinality, this can cause severe performance degradation. Prefer
+ * {@link DataSource#LIVEINSTANCES} when resource/partition filtering is not needed.
+ *
* @param recipientCriteria Criteria specifying the message destinations
* @param manager connection to the persisted data
* @return map of evaluated criteria
@@ -56,6 +95,12 @@ public List