Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add a dynamic setting to change skip_cache_factor and min_frequency for querycache ([#18351](https://github.com/opensearch-project/OpenSearch/issues/18351))
- Add overload constructor for Translog to accept Channel Factory as a parameter ([#18918](https://github.com/opensearch-project/OpenSearch/pull/18918))
- Add subdirectory-aware store module with recovery support ([#19132](https://github.com/opensearch-project/OpenSearch/pull/19132))
- [Rule-based Auto-tagging] Add autotagging label resolving logic for multiple attributes ([#19424](https://github.com/opensearch-project/OpenSearch/pull/19424))
- Field collapsing supports search_after ([#19261](https://github.com/opensearch-project/OpenSearch/pull/19261))
- Add a dynamic cluster setting to control the enablement of the merged segment warmer ([#18929](https://github.com/opensearch-project/OpenSearch/pull/18929))
- Publish transport-grpc-spi exposing QueryBuilderProtoConverter and QueryBuilderProtoConverterRegistry ([#18949](https://github.com/opensearch-project/OpenSearch/pull/18949))
Expand Down
1 change: 1 addition & 0 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ commonscodec = "1.18.0"
commonslang = "3.18.0"
commonscompress = "1.28.0"
commonsio = "2.16.0"
commonscollections4 = "4.5.0"
# plugin dependencies
aws = "2.32.29"
awscrt = "0.35.0"
Expand Down
2 changes: 1 addition & 1 deletion modules/autotagging-commons/common/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ apply plugin: 'opensearch.publish'
description = 'OpenSearch Rule framework common constructs which spi and module shares'

dependencies {
api 'org.apache.commons:commons-collections4:4.4'
api "org.apache.commons:commons-collections4:${versions.commonscollections4}"
implementation project(":libs:opensearch-common")
compileOnly project(":server")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,22 @@
* @param <V>
*/
public interface AttributeExtractor<V> {

/**
* Defines the combination style used when a request contains multiple values
* for an attribute.
*/
enum LogicalOperator {
/**
* Logical AND
*/
AND,
/**
* Logical OR
*/
OR
}
Comment on lines +23 to +32
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we expecting anything other than AND/OR? If not, might be better to have method return boolean value, say isConjunction()?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that will not be ideal since the return value might be little ambiguous in instructions after the method call e,g;

boolean isAnd = isConjuntion();
....

if (!isAnd) // this is ambiguos as this doesn't directly imply OR here vs LogicalOperator.OR 


/**
* This method returns the Attribute which it is responsible for extracting
* @return attribute
Expand All @@ -26,4 +42,13 @@ public interface AttributeExtractor<V> {
* @return attribute value
*/
Iterable<V> extract();

/**
* Returns the logical operator used when a request contains multiple values
* for an attribute.
* For example, if the request targets both index A and B, then a rule must
* have both index A and B as attributes, requiring an AND operator.
* @return the logical operator (e.g., AND, OR)
*/
LogicalOperator getLogicalOperator();
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
import org.opensearch.rule.attribute_extractor.AttributeExtractor;
import org.opensearch.rule.autotagging.Attribute;
import org.opensearch.rule.autotagging.Rule;
import org.opensearch.rule.feature_value_resolver.FeatureValueResolver;
import org.opensearch.rule.storage.AttributeValueStore;
import org.opensearch.rule.storage.AttributeValueStoreFactory;

import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand All @@ -32,13 +34,23 @@ public class InMemoryRuleProcessingService {
*/
public static final String WILDCARD = "*";
private final AttributeValueStoreFactory attributeValueStoreFactory;
/**
* Map of prioritized attributes
*/
private final Map<Attribute, Integer> prioritizedAttributes;

/**
* Constructor
* @param attributeValueStoreFactory
* Constructs an InMemoryRuleProcessingService with the given
* attribute value store factory and a prioritized list of attributes.
* @param attributeValueStoreFactory Factory to create attribute value stores.
* @param prioritizedAttributes Map of prioritized attributes
*/
public InMemoryRuleProcessingService(AttributeValueStoreFactory attributeValueStoreFactory) {
public InMemoryRuleProcessingService(
AttributeValueStoreFactory attributeValueStoreFactory,
Map<Attribute, Integer> prioritizedAttributes
) {
this.attributeValueStoreFactory = attributeValueStoreFactory;
this.prioritizedAttributes = prioritizedAttributes;
}

/**
Expand All @@ -58,8 +70,14 @@ public void remove(final Rule rule) {
}

private void perform(Rule rule, BiConsumer<Map.Entry<Attribute, Set<String>>, Rule> ruleOperation) {
for (Map.Entry<Attribute, Set<String>> attributeEntry : rule.getAttributeMap().entrySet()) {
ruleOperation.accept(attributeEntry, rule);
for (Attribute attribute : rule.getFeatureType().getAllowedAttributesRegistry().values()) {
Set<String> attributeValues;
if (rule.getAttributeMap().containsKey(attribute)) {
attributeValues = rule.getAttributeMap().get(attribute);
} else {
attributeValues = Set.of("");
}
ruleOperation.accept(Map.entry(attribute, attributeValues), rule);
}
}

Expand All @@ -78,37 +96,14 @@ private void addOperation(Map.Entry<Attribute, Set<String>> attributeEntry, Rule
}

/**
* Evaluates the label for the current request. It finds the matches for each attribute value and then it is an
* intersection of all the matches
* @param attributeExtractors list of extractors which are used to get the attribute values to find the
* matching rule
* @return a label if there is unique label otherwise empty
* Determines the final feature value for the given request
* @param attributeExtractors list of attribute extractors
*/
public Optional<String> evaluateLabel(List<AttributeExtractor<String>> attributeExtractors) {
assert attributeValueStoreFactory != null;
Optional<String> result = Optional.empty();
for (AttributeExtractor<String> attributeExtractor : attributeExtractors) {
AttributeValueStore<String, String> valueStore = attributeValueStoreFactory.getAttributeValueStore(
attributeExtractor.getAttribute()
);
for (String value : attributeExtractor.extract()) {
List<Set<String>> candidateMatches = valueStore.getAll(value);

if (candidateMatches == null || candidateMatches.isEmpty()) {
return Optional.empty();
}

Optional<String> possibleMatch = candidateMatches.get(0).stream().findAny();
if (result.isEmpty()) {
result = possibleMatch;
} else {
boolean isThePossibleMatchEqualResult = possibleMatch.get().equals(result.get());
if (!isThePossibleMatchEqualResult) {
return Optional.empty();
}
}
}
}
return result;
attributeExtractors.sort(
Comparator.comparingInt(extractor -> prioritizedAttributes.getOrDefault(extractor.getAttribute(), Integer.MAX_VALUE))
);
FeatureValueResolver featureValueResolver = new FeatureValueResolver(attributeValueStoreFactory);
return featureValueResolver.resolve(attributeExtractors).resolveLabel();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.rule.feature_value_resolver;

import org.opensearch.rule.attribute_extractor.AttributeExtractor;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
* Represents candidate feature values for an attribute
*/
public class CandidateFeatureValues {

/**
* A list of sets of candidate feature values collected for an attribute
* The list is ordered from the most specific match to less specific ones. For example:
* featureValues = [ {"a", "b"}, {"c"} ]
* Here, {"a", "b"} comes first because these feature values comes from rules with a more specific match
* e.g. A rule with "username|123" is a more specific match than "username|1" when querying "username|1234".
*/
private final List<Set<String>> featureValuesBySpecificity;

/**
* A flattened set of all candidate values collected across all specificity levels.
* This set combines all values in 'featureValues' into a single collection for easy access
* and intersection computations.
*/
private final Set<String> flattenedValues = new HashSet<>();

/**
* Maps each feature value to the index of its first occurrence set in 'featureValues'.
* This helps in tie-breaking: values appearing earlier in the list (i.e., more specific matches)
* are considered better matches when resolving the final label.
*/
private final Map<String, Integer> firstOccurrenceIndex = new HashMap<>();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am assuming this is for optimizing the lookup? Have we considered the latency impact without having this index?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We haven’t measured the latency impact/ run latency tests yet, but expect this should make lookups faster. Without it, we would need to iterate through every element in the list to determine the earliest occurrence, which would be way less efficient.


/**
* Constructs CandidateFeatureValues initialized with given list of value sets.
* @param initialValues List of sets of candidate values.
*/
public CandidateFeatureValues(List<Set<String>> initialValues) {
this.featureValuesBySpecificity = new ArrayList<>(initialValues);
for (int i = 0; i < featureValuesBySpecificity.size(); i++) {
for (String val : featureValuesBySpecificity.get(i)) {
flattenedValues.add(val);
firstOccurrenceIndex.putIfAbsent(val, i);
}
}
}

/**
* flattenedValues getter
*/
public Set<String> getFlattenedValues() {
return flattenedValues;
}

/**
* firstOccurrenceIndex getter
* @param value
*/
public int getFirstOccurrenceIndex(String value) {
return firstOccurrenceIndex.getOrDefault(value, Integer.MAX_VALUE);
}

/**
* Merges this CandidateFeatureValues with another based on the specified logical operator
* @param other Other CandidateFeatureValues to merge with.
* @param logicalOperator Logical operator (AND / OR) for merging.
*/
public CandidateFeatureValues merge(CandidateFeatureValues other, AttributeExtractor.LogicalOperator logicalOperator) {
return switch (logicalOperator) {
case AND -> mergeAnd(other);
case OR -> mergeOr(other);
};
}

private CandidateFeatureValues mergeOr(CandidateFeatureValues other) {
return mergeByIndex(this.featureValuesBySpecificity, other.featureValuesBySpecificity, null);
}

private CandidateFeatureValues mergeAnd(CandidateFeatureValues other) {
Set<String> elementsInThis = this.featureValuesBySpecificity.stream().flatMap(Set::stream).collect(Collectors.toSet());
Set<String> elementsInOther = other.featureValuesBySpecificity.stream().flatMap(Set::stream).collect(Collectors.toSet());

Set<String> common = new HashSet<>(elementsInThis);
common.retainAll(elementsInOther);

return mergeByIndex(this.featureValuesBySpecificity, other.featureValuesBySpecificity, common);
}

private CandidateFeatureValues mergeByIndex(List<Set<String>> list1, List<Set<String>> list2, Set<String> filterElements) {
List<Set<String>> result = new ArrayList<>();
int max = Math.max(list1.size(), list2.size());

for (int i = 0; i < max; i++) {
Set<String> merged = new HashSet<>();
if (i < list1.size()) {
merged.addAll(list1.get(i));
}
if (i < list2.size()) {
merged.addAll(list2.get(i));
}
if (filterElements != null) {
merged.retainAll(filterElements);
}
if (!merged.isEmpty()) {
result.add(merged);
}
}
return new CandidateFeatureValues(result);
}

@Override
public String toString() {
return "(" + "values=" + featureValuesBySpecificity + ')';
}

List<Set<String>> getFeatureValuesBySpecificity() {
return featureValuesBySpecificity;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.rule.feature_value_resolver;

import org.opensearch.rule.attribute_extractor.AttributeExtractor;
import org.opensearch.rule.storage.AttributeValueStore;

import java.util.List;
import java.util.Set;

/**
* Collects candidate feature values for a specified subfield of a given attribute extractor.
* For example, the "principal" attribute may contain subfields such as "username" and "role":
* principal: {
* "username": ["alice", "bob"],
* "role": ["admin"]
* }
* If the attribute does not define any subfields, then the subfield name is represented
* by an empty string ""
*/
public class FeatureValueCollector {

private final AttributeValueStore<String, String> attributeValueStore;
private final AttributeExtractor<String> attributeExtractor;
private final String subfield;

/**
* Constructs a FeatureValueCollector with the given store, extractor, and subfield.
* @param attributeValueStore The store to retrieve candidate feature values from.
* @param attributeExtractor The extractor to extract attribute values.
* @param subfield The subfield attribute
*/
public FeatureValueCollector(
AttributeValueStore<String, String> attributeValueStore,
AttributeExtractor<String> attributeExtractor,
String subfield
) {
this.attributeValueStore = attributeValueStore;
this.attributeExtractor = attributeExtractor;
this.subfield = subfield;
}

/**
* Collects feature values for the subfield from the attribute extractor.
*/
public CandidateFeatureValues collect() {
CandidateFeatureValues result = null;
for (String value : attributeExtractor.extract()) {
if (value.startsWith(subfield)) {
List<Set<String>> candidateLabels = attributeValueStore.getAll(value);
CandidateFeatureValues candidateValues = new CandidateFeatureValues(candidateLabels);
if (result == null) {
result = candidateValues;
} else {
result = candidateValues.merge(result, attributeExtractor.getLogicalOperator());
}
}
}
return result;
}
}
Loading
Loading