Skip to content

Commit 8f59bf2

Browse files
authored
[Rule-based Auto-tagging] Restructure trie to store values as a set (#19344)
Signed-off-by: Ruirui Zhang <[email protected]>
1 parent bb5f0e8 commit 8f59bf2

File tree

5 files changed

+108
-51
lines changed

5 files changed

+108
-51
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
99
- Add temporal routing processors for time-based document routing ([#18920](https://github.com/opensearch-project/OpenSearch/issues/18920))
1010
- Implement Query Rewriting Infrastructure ([#19060](https://github.com/opensearch-project/OpenSearch/pull/19060))
1111
- The dynamic mapping parameter supports false_allow_templates ([#19065](https://github.com/opensearch-project/OpenSearch/pull/19065) ([#19097](https://github.com/opensearch-project/OpenSearch/pull/19097)))
12+
- [Rule-based Auto-tagging] restructure the in-memory trie to store values as a set ([#19344](https://github.com/opensearch-project/OpenSearch/pull/19344))
1213
- Add a toBuilder method in EngineConfig to support easy modification of configs([#19054](https://github.com/opensearch-project/OpenSearch/pull/19054))
1314
- Add StoreFactory plugin interface for custom Store implementations([#19091](https://github.com/opensearch-project/OpenSearch/pull/19091))
1415
- Use S3CrtClient for higher throughput while uploading files to S3 ([#18800](https://github.com/opensearch-project/OpenSearch/pull/18800))

modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/AttributeValueStore.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88

99
package org.opensearch.rule.storage;
1010

11+
import java.util.ArrayList;
12+
import java.util.List;
1113
import java.util.Optional;
14+
import java.util.Set;
1215

1316
/**
1417
* This interface provides apis to store Rule attribute values
@@ -21,16 +24,32 @@ public interface AttributeValueStore<K, V> {
2124
*/
2225
void put(K key, V value);
2326

27+
/**
28+
* removes the key and associated value from attribute value store
29+
* @param key key of the value to be removed
30+
* @param value to be removed
31+
*/
32+
default void remove(K key, V value) {
33+
remove(key);
34+
}
35+
2436
/**
2537
* removes the key and associated value from attribute value store
2638
* @param key to be removed
2739
*/
2840
void remove(K key);
2941

42+
/**
43+
* Returns the values associated with the key
44+
* @param key in the data structure
45+
*/
46+
default List<Set<V>> getAll(K key) {
47+
return new ArrayList<>();
48+
}
49+
3050
/**
3151
* Returns the value associated with the key
3252
* @param key in the data structure
33-
* @return
3453
*/
3554
Optional<V> get(K key);
3655

modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/DefaultAttributeValueStore.java

Lines changed: 35 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,11 @@
1010

1111
import org.apache.commons.collections4.trie.PatriciaTrie;
1212

13-
import java.util.Map;
13+
import java.util.ArrayList;
14+
import java.util.HashSet;
15+
import java.util.List;
1416
import java.util.Optional;
17+
import java.util.Set;
1518
import java.util.concurrent.locks.ReentrantReadWriteLock;
1619

1720
/**
@@ -21,7 +24,7 @@
2124
* ref: https://commons.apache.org/proper/commons-collections/javadocs/api-4.4/org/apache/commons/collections4/trie/PatriciaTrie.html
2225
*/
2326
public class DefaultAttributeValueStore<K extends String, V> implements AttributeValueStore<K, V> {
24-
private final PatriciaTrie<V> trie;
27+
private final PatriciaTrie<Set<V>> trie;
2528
private static final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
2629
private static final ReentrantReadWriteLock.ReadLock readLock = lock.readLock();
2730
private static final ReentrantReadWriteLock.WriteLock writeLock = lock.writeLock();
@@ -37,74 +40,65 @@ public DefaultAttributeValueStore() {
3740
* Main constructor
3841
* @param trie A Patricia Trie
3942
*/
40-
public DefaultAttributeValueStore(PatriciaTrie<V> trie) {
43+
public DefaultAttributeValueStore(PatriciaTrie<Set<V>> trie) {
4144
this.trie = trie;
4245
}
4346

4447
@Override
4548
public void put(K key, V value) {
4649
writeLock.lock();
4750
try {
48-
trie.put(key, value);
51+
trie.computeIfAbsent(key, k -> new HashSet<>()).add(value);
4952
} finally {
5053
writeLock.unlock();
5154
}
5255
}
5356

5457
@Override
55-
public void remove(String key) {
58+
public void remove(K key, V value) {
5659
writeLock.lock();
5760
try {
58-
trie.remove(key);
61+
trie.computeIfPresent(key, (k, values) -> {
62+
values.remove(value);
63+
return values.isEmpty() ? null : values;
64+
});
5965
} finally {
6066
writeLock.unlock();
6167
}
6268
}
6369

6470
@Override
65-
public Optional<V> get(String key) {
71+
public void remove(K key) {
72+
throw new UnsupportedOperationException("This remove(K key) function is not supported within DefaultAttributeValueStore.");
73+
}
74+
75+
@Override
76+
public Optional<V> get(K key) {
77+
throw new UnsupportedOperationException("This get(K key) function is not supported within DefaultAttributeValueStore.");
78+
}
79+
80+
@Override
81+
public List<Set<V>> getAll(String key) {
6682
readLock.lock();
6783
try {
68-
/**
69-
* Since we are inserting prefixes into the trie and searching for larger strings
70-
* It is important to find the largest matching prefix key in the trie efficiently
71-
* Hence we can do binary search
72-
*/
73-
final String longestMatchingPrefix = findLongestMatchingPrefix(key);
84+
List<Set<V>> results = new ArrayList<>();
85+
StringBuilder prefixBuilder = new StringBuilder(key);
7486

75-
/**
76-
* Now there are following cases for this prefix
77-
* 1. There is a Rule which has this prefix as one of the attribute values. In this case we should return the
78-
* Rule's label otherwise send empty
79-
*/
80-
for (Map.Entry<String, V> possibleMatch : trie.prefixMap(longestMatchingPrefix).entrySet()) {
81-
if (key.startsWith(possibleMatch.getKey())) {
82-
return Optional.of(possibleMatch.getValue());
87+
for (int i = key.length(); i >= 0; i--) {
88+
String prefix = prefixBuilder.toString();
89+
Set<V> value = trie.get(prefix);
90+
if (value != null && !value.isEmpty()) {
91+
results.add(value);
92+
}
93+
if (!prefixBuilder.isEmpty()) {
94+
prefixBuilder.deleteCharAt(prefixBuilder.length() - 1);
8395
}
8496
}
97+
98+
return results;
8599
} finally {
86100
readLock.unlock();
87101
}
88-
return Optional.empty();
89-
}
90-
91-
private String findLongestMatchingPrefix(String key) {
92-
int low = 0;
93-
int high = key.length() - 1;
94-
95-
while (low < high) {
96-
int mid = (high + low + 1) / 2;
97-
/**
98-
* This operation has O(1) complexity because prefixMap returns only the iterator
99-
*/
100-
if (!trie.prefixMap(key.substring(0, mid)).isEmpty()) {
101-
low = mid;
102-
} else {
103-
high = mid - 1;
104-
}
105-
}
106-
107-
return key.substring(0, low);
108102
}
109103

110104
@Override

modules/autotagging-commons/common/src/test/java/org/opensearch/rule/storage/AttributeValueStoreTests.java

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313

1414
import java.util.ArrayList;
1515
import java.util.List;
16+
import java.util.Optional;
17+
import java.util.Set;
1618

1719
public class AttributeValueStoreTests extends OpenSearchTestCase {
1820

@@ -26,27 +28,33 @@ public void setUp() throws Exception {
2628

2729
public void testPut() {
2830
subjectUnderTest.put("foo", "bar");
29-
assertEquals("bar", subjectUnderTest.get("foo").get());
31+
assertEquals("bar", subjectUnderTest.getAll("foo").getFirst().iterator().next());
32+
subjectUnderTest.put("foo", "sing");
33+
assertEquals(1, subjectUnderTest.getAll("foo").size());
34+
assertEquals(2, subjectUnderTest.getAll("foo").get(0).size());
35+
assertTrue(subjectUnderTest.getAll("foo").get(0).contains("sing"));
3036
}
3137

3238
public void testRemove() {
3339
subjectUnderTest.put("foo", "bar");
34-
subjectUnderTest.remove("foo");
40+
subjectUnderTest.remove("foo", "bar");
3541
assertEquals(0, subjectUnderTest.size());
3642
}
3743

3844
public void tesGet() {
3945
subjectUnderTest.put("foo", "bar");
40-
assertEquals("bar", subjectUnderTest.get("foo").get());
46+
assertEquals("bar", subjectUnderTest.getAll("foo").getFirst());
47+
subjectUnderTest.put("foo", "sing");
48+
assertEquals(2, subjectUnderTest.getAll("foo").size());
4149
}
4250

4351
public void testGetWhenNoProperPrefixIsPresent() {
4452
subjectUnderTest.put("foo", "bar");
4553
subjectUnderTest.put("foodip", "sing");
46-
assertTrue(subjectUnderTest.get("foxtail").isEmpty());
54+
assertTrue(subjectUnderTest.getAll("foxtail").isEmpty());
4755
subjectUnderTest.put("fox", "lucy");
4856

49-
assertFalse(subjectUnderTest.get("foxtail").isEmpty());
57+
assertFalse(subjectUnderTest.getAll("foxtail").isEmpty());
5058
}
5159

5260
public void testClear() {
@@ -97,7 +105,7 @@ public void run() {
97105
try {
98106
Thread.sleep(random().nextInt(100));
99107
for (String key : toReadKeys) {
100-
subjectUnderTest.get(key);
108+
subjectUnderTest.getAll(key);
101109
}
102110
} catch (InterruptedException e) {}
103111
}
@@ -123,4 +131,38 @@ public void run() {
123131
} catch (InterruptedException e) {}
124132
}
125133
}
134+
135+
public void testDefaultMethods() {
136+
class DummyStore implements AttributeValueStore<String, String> {
137+
boolean removeCalled = false;
138+
139+
@Override
140+
public void put(String key, String value) {}
141+
142+
@Override
143+
public void remove(String key) {
144+
removeCalled = true;
145+
}
146+
147+
@Override
148+
public Optional<String> get(String key) {
149+
return Optional.empty();
150+
}
151+
152+
@Override
153+
public void clear() {}
154+
155+
@Override
156+
public int size() {
157+
return 0;
158+
}
159+
}
160+
161+
DummyStore store = new DummyStore();
162+
store.remove("foo", "bar");
163+
assertTrue(store.removeCalled);
164+
List<Set<String>> result = store.getAll("foo");
165+
assertNotNull(result);
166+
assertTrue(result.isEmpty());
167+
}
126168
}

modules/autotagging-commons/src/main/java/org/opensearch/rule/InMemoryRuleProcessingService.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ private void perform(Rule rule, BiConsumer<Map.Entry<Attribute, Set<String>>, Ru
6666
private void removeOperation(Map.Entry<Attribute, Set<String>> attributeEntry, Rule rule) {
6767
AttributeValueStore<String, String> valueStore = attributeValueStoreFactory.getAttributeValueStore(attributeEntry.getKey());
6868
for (String value : attributeEntry.getValue()) {
69-
valueStore.remove(value.replace(WILDCARD, ""));
69+
valueStore.remove(value.replace(WILDCARD, ""), rule.getFeatureValue());
7070
}
7171
}
7272

@@ -92,12 +92,13 @@ public Optional<String> evaluateLabel(List<AttributeExtractor<String>> attribute
9292
attributeExtractor.getAttribute()
9393
);
9494
for (String value : attributeExtractor.extract()) {
95-
Optional<String> possibleMatch = valueStore.get(value);
95+
List<Set<String>> candidateMatches = valueStore.getAll(value);
9696

97-
if (possibleMatch.isEmpty()) {
97+
if (candidateMatches == null || candidateMatches.isEmpty()) {
9898
return Optional.empty();
9999
}
100100

101+
Optional<String> possibleMatch = candidateMatches.get(0).stream().findAny();
101102
if (result.isEmpty()) {
102103
result = possibleMatch;
103104
} else {

0 commit comments

Comments
 (0)