-
Notifications
You must be signed in to change notification settings - Fork 2.3k
[Rule-based Auto-tagging] Add autotagging label resolving logic for multiple attributes #19424
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,22 @@ | |
| * @param <V> | ||
| */ | ||
| public interface AttributeExtractor<V> { | ||
|
|
||
| /** | ||
| * Defines the combination style used when a request contains multiple values | ||
| * for an attribute. | ||
| */ | ||
| enum LogicalOperator { | ||
| /** | ||
| * Logical AND | ||
| */ | ||
| AND, | ||
| /** | ||
| * Logical OR | ||
| */ | ||
| OR | ||
| } | ||
|
Comment on lines
+23
to
+32
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we expecting anything other than AND/OR? If not, might be better to have method return boolean value, say
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that will not be ideal since the return value might be little ambiguous in instructions after the method call e,g; |
||
|
|
||
| /** | ||
| * This method returns the Attribute which it is responsible for extracting | ||
| * @return attribute | ||
|
|
@@ -26,4 +42,13 @@ public interface AttributeExtractor<V> { | |
| * @return attribute value | ||
| */ | ||
| Iterable<V> extract(); | ||
|
|
||
| /** | ||
| * Returns the logical operator used when a request contains multiple values | ||
| * for an attribute. | ||
| * For example, if the request targets both index A and B, then a rule must | ||
| * have both index A and B as attributes, requiring an AND operator. | ||
| * @return the logical operator (e.g., AND, OR) | ||
| */ | ||
| LogicalOperator getLogicalOperator(); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,134 @@ | ||
| /* | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| * | ||
| * The OpenSearch Contributors require contributions made to | ||
| * this file be licensed under the Apache-2.0 license or a | ||
| * compatible open source license. | ||
| */ | ||
|
|
||
| package org.opensearch.rule.feature_value_resolver; | ||
|
|
||
| import org.opensearch.rule.attribute_extractor.AttributeExtractor; | ||
|
|
||
| import java.util.ArrayList; | ||
| import java.util.HashMap; | ||
| import java.util.HashSet; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Set; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| /** | ||
| * Represents candidate feature values for an attribute | ||
| */ | ||
| public class CandidateFeatureValues { | ||
|
|
||
| /** | ||
| * A list of sets of candidate feature values collected for an attribute | ||
| * The list is ordered from the most specific match to less specific ones. For example: | ||
| * featureValues = [ {"a", "b"}, {"c"} ] | ||
| * Here, {"a", "b"} comes first because these feature values comes from rules with a more specific match | ||
| * e.g. A rule with "username|123" is a more specific match than "username|1" when querying "username|1234". | ||
| */ | ||
| private final List<Set<String>> featureValuesBySpecificity; | ||
|
|
||
| /** | ||
| * A flattened set of all candidate values collected across all specificity levels. | ||
| * This set combines all values in 'featureValues' into a single collection for easy access | ||
| * and intersection computations. | ||
| */ | ||
| private final Set<String> flattenedValues = new HashSet<>(); | ||
|
|
||
| /** | ||
| * Maps each feature value to the index of its first occurrence set in 'featureValues'. | ||
| * This helps in tie-breaking: values appearing earlier in the list (i.e., more specific matches) | ||
| * are considered better matches when resolving the final label. | ||
| */ | ||
| private final Map<String, Integer> firstOccurrenceIndex = new HashMap<>(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am assuming this is for optimizing the lookup? Have we considered the latency impact without having this index?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We haven’t measured the latency impact/ run latency tests yet, but expect this should make lookups faster. Without it, we would need to iterate through every element in the list to determine the earliest occurrence, which would be way less efficient. |
||
|
|
||
| /** | ||
| * Constructs CandidateFeatureValues initialized with given list of value sets. | ||
| * @param initialValues List of sets of candidate values. | ||
| */ | ||
| public CandidateFeatureValues(List<Set<String>> initialValues) { | ||
| this.featureValuesBySpecificity = new ArrayList<>(initialValues); | ||
| for (int i = 0; i < featureValuesBySpecificity.size(); i++) { | ||
| for (String val : featureValuesBySpecificity.get(i)) { | ||
| flattenedValues.add(val); | ||
| firstOccurrenceIndex.putIfAbsent(val, i); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * flattenedValues getter | ||
| */ | ||
| public Set<String> getFlattenedValues() { | ||
| return flattenedValues; | ||
| } | ||
|
|
||
| /** | ||
| * firstOccurrenceIndex getter | ||
| * @param value | ||
| */ | ||
| public int getFirstOccurrenceIndex(String value) { | ||
| return firstOccurrenceIndex.getOrDefault(value, Integer.MAX_VALUE); | ||
| } | ||
|
|
||
| /** | ||
| * Merges this CandidateFeatureValues with another based on the specified logical operator | ||
| * @param other Other CandidateFeatureValues to merge with. | ||
| * @param logicalOperator Logical operator (AND / OR) for merging. | ||
| */ | ||
| public CandidateFeatureValues merge(CandidateFeatureValues other, AttributeExtractor.LogicalOperator logicalOperator) { | ||
| return switch (logicalOperator) { | ||
| case AND -> mergeAnd(other); | ||
| case OR -> mergeOr(other); | ||
| }; | ||
| } | ||
|
|
||
| private CandidateFeatureValues mergeOr(CandidateFeatureValues other) { | ||
| return mergeByIndex(this.featureValuesBySpecificity, other.featureValuesBySpecificity, null); | ||
| } | ||
|
|
||
| private CandidateFeatureValues mergeAnd(CandidateFeatureValues other) { | ||
| Set<String> elementsInThis = this.featureValuesBySpecificity.stream().flatMap(Set::stream).collect(Collectors.toSet()); | ||
| Set<String> elementsInOther = other.featureValuesBySpecificity.stream().flatMap(Set::stream).collect(Collectors.toSet()); | ||
|
|
||
| Set<String> common = new HashSet<>(elementsInThis); | ||
| common.retainAll(elementsInOther); | ||
|
|
||
| return mergeByIndex(this.featureValuesBySpecificity, other.featureValuesBySpecificity, common); | ||
| } | ||
|
|
||
| private CandidateFeatureValues mergeByIndex(List<Set<String>> list1, List<Set<String>> list2, Set<String> filterElements) { | ||
| List<Set<String>> result = new ArrayList<>(); | ||
| int max = Math.max(list1.size(), list2.size()); | ||
|
|
||
| for (int i = 0; i < max; i++) { | ||
| Set<String> merged = new HashSet<>(); | ||
| if (i < list1.size()) { | ||
| merged.addAll(list1.get(i)); | ||
| } | ||
| if (i < list2.size()) { | ||
| merged.addAll(list2.get(i)); | ||
| } | ||
| if (filterElements != null) { | ||
| merged.retainAll(filterElements); | ||
| } | ||
| if (!merged.isEmpty()) { | ||
| result.add(merged); | ||
| } | ||
| } | ||
| return new CandidateFeatureValues(result); | ||
| } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return "(" + "values=" + featureValuesBySpecificity + ')'; | ||
| } | ||
|
|
||
| List<Set<String>> getFeatureValuesBySpecificity() { | ||
| return featureValuesBySpecificity; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,67 @@ | ||
| /* | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| * | ||
| * The OpenSearch Contributors require contributions made to | ||
| * this file be licensed under the Apache-2.0 license or a | ||
| * compatible open source license. | ||
| */ | ||
|
|
||
| package org.opensearch.rule.feature_value_resolver; | ||
|
|
||
| import org.opensearch.rule.attribute_extractor.AttributeExtractor; | ||
| import org.opensearch.rule.storage.AttributeValueStore; | ||
|
|
||
| import java.util.List; | ||
| import java.util.Set; | ||
|
|
||
| /** | ||
| * Collects candidate feature values for a specified subfield of a given attribute extractor. | ||
| * For example, the "principal" attribute may contain subfields such as "username" and "role": | ||
| * principal: { | ||
| * "username": ["alice", "bob"], | ||
| * "role": ["admin"] | ||
| * } | ||
| * If the attribute does not define any subfields, then the subfield name is represented | ||
| * by an empty string "" | ||
| */ | ||
| public class FeatureValueCollector { | ||
|
|
||
| private final AttributeValueStore<String, String> attributeValueStore; | ||
| private final AttributeExtractor<String> attributeExtractor; | ||
| private final String subfield; | ||
|
|
||
| /** | ||
| * Constructs a FeatureValueCollector with the given store, extractor, and subfield. | ||
| * @param attributeValueStore The store to retrieve candidate feature values from. | ||
| * @param attributeExtractor The extractor to extract attribute values. | ||
| * @param subfield The subfield attribute | ||
| */ | ||
| public FeatureValueCollector( | ||
| AttributeValueStore<String, String> attributeValueStore, | ||
| AttributeExtractor<String> attributeExtractor, | ||
| String subfield | ||
| ) { | ||
| this.attributeValueStore = attributeValueStore; | ||
| this.attributeExtractor = attributeExtractor; | ||
| this.subfield = subfield; | ||
| } | ||
|
|
||
| /** | ||
| * Collects feature values for the subfield from the attribute extractor. | ||
| */ | ||
| public CandidateFeatureValues collect() { | ||
| CandidateFeatureValues result = null; | ||
| for (String value : attributeExtractor.extract()) { | ||
| if (value.startsWith(subfield)) { | ||
| List<Set<String>> candidateLabels = attributeValueStore.getAll(value); | ||
| CandidateFeatureValues candidateValues = new CandidateFeatureValues(candidateLabels); | ||
| if (result == null) { | ||
| result = candidateValues; | ||
| } else { | ||
| result = candidateValues.merge(result, attributeExtractor.getLogicalOperator()); | ||
| } | ||
| } | ||
| } | ||
| return result; | ||
| } | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.