opensearch-project · ruai0511 · Sep 25, 2025 · jainankitk · Sep 26, 2025 · kaushalmahi12
@@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Add a dynamic setting to change skip_cache_factor and min_frequency for querycache ([#18351](https://github.com/opensearch-project/OpenSearch/issues/18351))
 - Add overload constructor for Translog to accept Channel Factory as a parameter ([#18918](https://github.com/opensearch-project/OpenSearch/pull/18918))
 - Add subdirectory-aware store module with recovery support ([#19132](https://github.com/opensearch-project/OpenSearch/pull/19132))
+- [Rule-based Auto-tagging] Add autotagging label resolving logic for multiple attributes ([#19424](https://github.com/opensearch-project/OpenSearch/pull/19424))
 - Field collapsing supports search_after ([#19261](https://github.com/opensearch-project/OpenSearch/pull/19261))
 - Add a dynamic cluster setting to control the enablement of the merged segment warmer ([#18929](https://github.com/opensearch-project/OpenSearch/pull/18929))
 - Publish transport-grpc-spi exposing QueryBuilderProtoConverter and QueryBuilderProtoConverterRegistry ([#18949](https://github.com/opensearch-project/OpenSearch/pull/18949))

@@ -56,6 +56,7 @@ commonscodec      = "1.18.0"
 commonslang       = "3.18.0"
 commonscompress   = "1.28.0"
 commonsio         = "2.16.0"
+commonscollections4 = "4.5.0"
 # plugin dependencies
 aws               = "2.32.29"
 awscrt            = "0.35.0"

@@ -12,7 +12,7 @@ apply plugin: 'opensearch.publish'
 description = 'OpenSearch Rule framework common constructs which spi and module shares'
 
 dependencies {
-  api 'org.apache.commons:commons-collections4:4.4'
+  api "org.apache.commons:commons-collections4:${versions.commonscollections4}"
   implementation project(":libs:opensearch-common")
   compileOnly project(":server")
 

@@ -15,6 +15,22 @@
  * @param <V>
  */
 public interface AttributeExtractor<V> {
+
+    /**
+     * Defines the combination style used when a request contains multiple values
+     * for an attribute.
+     */
+    enum LogicalOperator {
+        /**
+         * Logical AND
+         */
+        AND,
+        /**
+         * Logical OR
+         */
+        OR
+    }
+
     /**
      * This method returns the Attribute which it is responsible for extracting
      * @return attribute
@@ -26,4 +42,13 @@ public interface AttributeExtractor<V> {
      * @return attribute value
      */
     Iterable<V> extract();
+
+    /**
+     * Returns the logical operator used when a request contains multiple values
+     * for an attribute.
+     * For example, if the request targets both index A and B, then a rule must
+     * have both index A and B as attributes, requiring an AND operator.
+     * @return the logical operator (e.g., AND, OR)
+     */
+    LogicalOperator getLogicalOperator();
 }
@@ -11,9 +11,11 @@
 import org.opensearch.rule.attribute_extractor.AttributeExtractor;
 import org.opensearch.rule.autotagging.Attribute;
 import org.opensearch.rule.autotagging.Rule;
+import org.opensearch.rule.feature_value_resolver.FeatureValueResolver;
 import org.opensearch.rule.storage.AttributeValueStore;
 import org.opensearch.rule.storage.AttributeValueStoreFactory;
 
+import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -32,13 +34,23 @@ public class InMemoryRuleProcessingService {
      */
     public static final String WILDCARD = "*";
     private final AttributeValueStoreFactory attributeValueStoreFactory;
+    /**
+     * Map of prioritized attributes
+     */
+    private final Map<Attribute, Integer> prioritizedAttributes;
 
     /**
-     *  Constructor
-     * @param attributeValueStoreFactory
+     * Constructs an InMemoryRuleProcessingService with the given
+     * attribute value store factory and a prioritized list of attributes.
+     * @param attributeValueStoreFactory Factory to create attribute value stores.
+     * @param prioritizedAttributes      Map of prioritized attributes
      */
-    public InMemoryRuleProcessingService(AttributeValueStoreFactory attributeValueStoreFactory) {
+    public InMemoryRuleProcessingService(
+        AttributeValueStoreFactory attributeValueStoreFactory,
+        Map<Attribute, Integer> prioritizedAttributes
+    ) {
         this.attributeValueStoreFactory = attributeValueStoreFactory;
+        this.prioritizedAttributes = prioritizedAttributes;
     }
 
     /**
@@ -58,8 +70,14 @@ public void remove(final Rule rule) {
     }
 
     private void perform(Rule rule, BiConsumer<Map.Entry<Attribute, Set<String>>, Rule> ruleOperation) {
-        for (Map.Entry<Attribute, Set<String>> attributeEntry : rule.getAttributeMap().entrySet()) {
-            ruleOperation.accept(attributeEntry, rule);
+        for (Attribute attribute : rule.getFeatureType().getAllowedAttributesRegistry().values()) {
+            Set<String> attributeValues;
+            if (rule.getAttributeMap().containsKey(attribute)) {
+                attributeValues = rule.getAttributeMap().get(attribute);
+            } else {
+                attributeValues = Set.of("");
+            }
+            ruleOperation.accept(Map.entry(attribute, attributeValues), rule);
         }
     }
 
@@ -78,37 +96,14 @@ private void addOperation(Map.Entry<Attribute, Set<String>> attributeEntry, Rule
     }
 
     /**
-     * Evaluates the label for the current request. It finds the matches for each attribute value and then it is an
-     * intersection of all the matches
-     * @param attributeExtractors list of extractors which are used to get the attribute values to find the
-     *                           matching rule
-     * @return a label if there is unique label otherwise empty
+     * Determines the final feature value for the given request
+     * @param attributeExtractors list of attribute extractors
      */
     public Optional<String> evaluateLabel(List<AttributeExtractor<String>> attributeExtractors) {
-        assert attributeValueStoreFactory != null;
-        Optional<String> result = Optional.empty();
-        for (AttributeExtractor<String> attributeExtractor : attributeExtractors) {
-            AttributeValueStore<String, String> valueStore = attributeValueStoreFactory.getAttributeValueStore(
-                attributeExtractor.getAttribute()
-            );
-            for (String value : attributeExtractor.extract()) {
-                List<Set<String>> candidateMatches = valueStore.getAll(value);
-
-                if (candidateMatches == null || candidateMatches.isEmpty()) {
-                    return Optional.empty();
-                }
-
-                Optional<String> possibleMatch = candidateMatches.get(0).stream().findAny();
-                if (result.isEmpty()) {
-                    result = possibleMatch;
-                } else {
-                    boolean isThePossibleMatchEqualResult = possibleMatch.get().equals(result.get());
-                    if (!isThePossibleMatchEqualResult) {
-                        return Optional.empty();
-                    }
-                }
-            }
-        }
-        return result;
+        attributeExtractors.sort(
+            Comparator.comparingInt(extractor -> prioritizedAttributes.getOrDefault(extractor.getAttribute(), Integer.MAX_VALUE))
+        );
+        FeatureValueResolver featureValueResolver = new FeatureValueResolver(attributeValueStoreFactory);
+        return featureValueResolver.resolve(attributeExtractors).resolveLabel();
     }
 }
@@ -0,0 +1,134 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.rule.feature_value_resolver;
+
+import org.opensearch.rule.attribute_extractor.AttributeExtractor;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Represents candidate feature values for an attribute
+ */
+public class CandidateFeatureValues {
+
+    /**
+     * A list of sets of candidate feature values collected for an attribute
+     * The list is ordered from the most specific match to less specific ones. For example:
+     * featureValues = [ {"a", "b"}, {"c"} ]
+     * Here, {"a", "b"} comes first because these feature values comes from rules with a more specific match
+     * e.g. A rule with "username|123" is a more specific match than "username|1" when querying "username|1234".
+     */
+    private final List<Set<String>> featureValuesBySpecificity;
+
+    /**
+     * A flattened set of all candidate values collected across all specificity levels.
+     * This set combines all values in 'featureValues' into a single collection for easy access
+     * and intersection computations.
+     */
+    private final Set<String> flattenedValues = new HashSet<>();
+
+    /**
+     * Maps each feature value to the index of its first occurrence set in 'featureValues'.
+     * This helps in tie-breaking: values appearing earlier in the list (i.e., more specific matches)
+     * are considered better matches when resolving the final label.
+     */
+    private final Map<String, Integer> firstOccurrenceIndex = new HashMap<>();
+
+    /**
+     * Constructs CandidateFeatureValues initialized with given list of value sets.
+     * @param initialValues List of sets of candidate values.
+     */
+    public CandidateFeatureValues(List<Set<String>> initialValues) {
+        this.featureValuesBySpecificity = new ArrayList<>(initialValues);
+        for (int i = 0; i < featureValuesBySpecificity.size(); i++) {
+            for (String val : featureValuesBySpecificity.get(i)) {
+                flattenedValues.add(val);
+                firstOccurrenceIndex.putIfAbsent(val, i);
+            }
+        }
+    }
+
+    /**
+     * flattenedValues getter
+     */
+    public Set<String> getFlattenedValues() {
+        return flattenedValues;
+    }
+
+    /**
+     * firstOccurrenceIndex getter
+     * @param value
+     */
+    public int getFirstOccurrenceIndex(String value) {
+        return firstOccurrenceIndex.getOrDefault(value, Integer.MAX_VALUE);
+    }
+
+    /**
+     * Merges this CandidateFeatureValues with another based on the specified logical operator
+     * @param other            Other CandidateFeatureValues to merge with.
+     * @param logicalOperator Logical operator (AND / OR) for merging.
+     */
+    public CandidateFeatureValues merge(CandidateFeatureValues other, AttributeExtractor.LogicalOperator logicalOperator) {
+        return switch (logicalOperator) {
+            case AND -> mergeAnd(other);
+            case OR -> mergeOr(other);
+        };
+    }
+
+    private CandidateFeatureValues mergeOr(CandidateFeatureValues other) {
+        return mergeByIndex(this.featureValuesBySpecificity, other.featureValuesBySpecificity, null);
+    }
+
+    private CandidateFeatureValues mergeAnd(CandidateFeatureValues other) {
+        Set<String> elementsInThis = this.featureValuesBySpecificity.stream().flatMap(Set::stream).collect(Collectors.toSet());
+        Set<String> elementsInOther = other.featureValuesBySpecificity.stream().flatMap(Set::stream).collect(Collectors.toSet());
+
+        Set<String> common = new HashSet<>(elementsInThis);
+        common.retainAll(elementsInOther);
+
+        return mergeByIndex(this.featureValuesBySpecificity, other.featureValuesBySpecificity, common);
+    }
+
+    private CandidateFeatureValues mergeByIndex(List<Set<String>> list1, List<Set<String>> list2, Set<String> filterElements) {
+        List<Set<String>> result = new ArrayList<>();
+        int max = Math.max(list1.size(), list2.size());
+
+        for (int i = 0; i < max; i++) {
+            Set<String> merged = new HashSet<>();
+            if (i < list1.size()) {
+                merged.addAll(list1.get(i));
+            }
+            if (i < list2.size()) {
+                merged.addAll(list2.get(i));
+            }
+            if (filterElements != null) {
+                merged.retainAll(filterElements);
+            }
+            if (!merged.isEmpty()) {
+                result.add(merged);
+            }
+        }
+        return new CandidateFeatureValues(result);
+    }
+
+    @Override
+    public String toString() {
+        return "(" + "values=" + featureValuesBySpecificity + ')';
+    }
+
+    List<Set<String>> getFeatureValuesBySpecificity() {
+        return featureValuesBySpecificity;
+    }
+}
@@ -0,0 +1,67 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.rule.feature_value_resolver;
+
+import org.opensearch.rule.attribute_extractor.AttributeExtractor;
+import org.opensearch.rule.storage.AttributeValueStore;
+
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Collects candidate feature values for a specified subfield of a given attribute extractor.
+ * For example, the "principal" attribute may contain subfields such as "username" and "role":
+ * principal: {
+ *   "username": ["alice", "bob"],
+ *   "role": ["admin"]
+ * }
+ * If the attribute does not define any subfields, then the subfield name is represented
+ * by an empty string ""
+ */
+public class FeatureValueCollector {
+
+    private final AttributeValueStore<String, String> attributeValueStore;
+    private final AttributeExtractor<String> attributeExtractor;
+    private final String subfield;
+
+    /**
+     * Constructs a FeatureValueCollector with the given store, extractor, and subfield.
+     * @param attributeValueStore The store to retrieve candidate feature values from.
+     * @param attributeExtractor  The extractor to extract attribute values.
+     * @param subfield            The subfield attribute
+     */
+    public FeatureValueCollector(
+        AttributeValueStore<String, String> attributeValueStore,
+        AttributeExtractor<String> attributeExtractor,
+        String subfield
+    ) {
+        this.attributeValueStore = attributeValueStore;
+        this.attributeExtractor = attributeExtractor;
+        this.subfield = subfield;
+    }
+
+    /**
+     * Collects feature values for the subfield from the attribute extractor.
+     */
+    public CandidateFeatureValues collect() {
+        CandidateFeatureValues result = null;
+        for (String value : attributeExtractor.extract()) {
+            if (value.startsWith(subfield)) {
+                List<Set<String>> candidateLabels = attributeValueStore.getAll(value);
+                CandidateFeatureValues candidateValues = new CandidateFeatureValues(candidateLabels);
+                if (result == null) {
+                    result = candidateValues;
+                } else {
+                    result = candidateValues.merge(result, attributeExtractor.getLogicalOperator());
+                }
+            }
+        }
+        return result;
+    }
+}