Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
469cf51
Combining filter rewrite and skip list approaches for further optimiz…
jainankitk Oct 8, 2025
e20f702
Removing parent aggregation check for perf benchmark
jainankitk Oct 8, 2025
82bc95d
Adding changelog entry
jainankitk Oct 8, 2025
aff3dc6
Applying the skip list optimization for AutoDateHistogram
jainankitk Oct 9, 2025
1c29540
Addressing checkstyle failures
jainankitk Oct 9, 2025
b9e9f2b
Apply spotless
jainankitk Oct 9, 2025
a28b9c1
Merge branch 'main' into agg-perf
jainankitk Oct 9, 2025
0a9ef40
Minor bug fix
jainankitk Oct 10, 2025
8d4ccf7
Merge branch 'main' into agg-perf
jainankitk Oct 21, 2025
4e7d9e6
Merge branch 'main' into agg-perf
jainankitk Oct 23, 2025
23fbad3
Add unit test for filter rewrite with date histogram with skiplist.
asimmahmood1 Oct 27, 2025
7eb64f7
Spotless check
asimmahmood1 Oct 27, 2025
35834e4
Fix unit test
asimmahmood1 Oct 27, 2025
2b593c9
Merge remote-tracking branch 'upstream/main' into agg-perf
asimmahmood1 Oct 27, 2025
3cdc37d
Not ready for check-in, just throwing this out to come up with differ…
asimmahmood1 Nov 10, 2025
d0eeb37
Revert auto date changes for this PR
asimmahmood1 Nov 10, 2025
66ffef1
Merge remote-tracking branch 'upstream/main' into agg-perf
asimmahmood1 Nov 10, 2025
0ec357a
Switch to Lucene's version of BitSetDocIdStream
asimmahmood1 Nov 12, 2025
7a7209f
Merge remote-tracking branch 'upstream/main' into agg-perf
asimmahmood1 Nov 12, 2025
37f4641
Merge branch 'main' into agg-perf
jainankitk Nov 14, 2025
eaf7e52
Resolving merge conflict issue
jainankitk Nov 14, 2025
7c05efe
Fixing build failure
jainankitk Nov 14, 2025
1d97bee
Merge branch 'main' into agg-perf
jainankitk Nov 14, 2025
d8448f5
Merge branch 'main' into agg-perf
jainankitk Nov 15, 2025
6888b6c
This is more concise method I can of. It doesn't guarentee only SugAg…
asimmahmood1 Nov 17, 2025
7bb162e
Fix unit test
asimmahmood1 Nov 17, 2025
d961a87
Merge remote-tracking branch 'upstream/main' into agg-perf
asimmahmood1 Nov 17, 2025
d208ed7
Fix unit test
asimmahmood1 Nov 19, 2025
27f8248
Initial work to get auto date histogram to use skiplist
asimmahmood1 Nov 19, 2025
887799d
Fixed rounding logic in FromSingle. Added FromMany logic when called …
asimmahmood1 Nov 24, 2025
4d7f22d
Javadoc
asimmahmood1 Nov 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Support pull-based ingestion message mappers and raw payload support ([#19765](https://github.com/opensearch-project/OpenSearch/pull/19765)]

### Changed
- Combining filter rewrite and skip list to optimize sub aggregation([#19573](https://github.com/opensearch-project/OpenSearch/pull/19573))
- Faster `terms` query creation for `keyword` field with index and docValues enabled ([#19350](https://github.com/opensearch-project/OpenSearch/pull/19350))
- Refactor to move prepareIndex and prepareDelete methods to Engine class ([#19551](https://github.com/opensearch-project/OpenSearch/pull/19551))
- Omit maxScoreCollector in SimpleTopDocsCollectorContext when concurrent segment search enabled ([#19584](https://github.com/opensearch-project/OpenSearch/pull/19584))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ public abstract class AggregatorBase extends Aggregator {
private Map<String, Aggregator> subAggregatorbyName;
private final CircuitBreakerService breakerService;
private long requestBytesUsed;
boolean precomputePath = false;

/**
* Constructs a new Aggregator.
Expand Down Expand Up @@ -202,8 +203,13 @@ public Map<String, Object> metadata() {

@Override
public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException {
if (tryPrecomputeAggregationForLeaf(ctx)) {
throw new CollectionTerminatedException();
try {
precomputePath = true;
if (tryPrecomputeAggregationForLeaf(ctx)) {
throw new CollectionTerminatedException();
}
} finally {
precomputePath = false;
}
preGetSubLeafCollectors(ctx);
final LeafBucketCollector sub = collectableSubAggregators.getLeafCollector(ctx);
Expand Down Expand Up @@ -236,6 +242,29 @@ protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws
return false;
}

/**
* Returns true if currently in precompute path
* @return
*/
protected boolean isTryPrecomputePath() {
if (precomputePath) {
return true;
} else if (parent == null) {
return false;
}
Aggregator current = parent;
do {
if (current instanceof AggregatorBase base) {
if (base.precomputePath) {
return true;
}
}
current = current.parent();
} while (current != null);

return precomputePath;
}

@Override
public final void preCollection() throws IOException {
List<BucketCollector> collectors = Arrays.asList(subAggregators);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.search.aggregations.bucket;

import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.DocIdStream;
import org.apache.lucene.search.Scorable;
import org.opensearch.common.Rounding;
import org.opensearch.search.aggregations.LeafBucketCollector;
import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds;

import java.io.IOException;
import java.util.function.LongFunction;
import java.util.function.Supplier;

/**
* Histogram collection logic using skip list.
*
* Currently, it can only handle one owningBucketOrd at a time.
*
* @opensearch.internal
*/
public class HistogramSkiplistLeafCollector extends LeafBucketCollector {

private final NumericDocValues values;
private final DocValuesSkipper skipper;
private final LeafBucketCollector sub;
private final BucketsAggregator aggregator;

/**
* Supplier function to get the current preparedRounding from the parent aggregator.
* This allows detection of rounding changes in AutoDateHistogramAggregator.
*/
private final LongFunction<Rounding.Prepared> preparedRoundingSupplier;
private final java.util.function.Supplier<LongKeyedBucketOrds> bucketOrdsSupplier;
private final IncreaseRoundingIfNeeded increaseRoundingIfNeeded;

/**
* Max doc ID (inclusive) up to which all docs values may map to the same
* bucket.
*/
private int upToInclusive = -1;

/**
* Whether all docs up to {@link #upToInclusive} values map to the same bucket.
*/
private boolean upToSameBucket;

/**
* Index in bucketOrds for docs up to {@link #upToInclusive}.
*/
private long upToBucketIndex;

/**
* Tracks the last preparedRounding reference to detect rounding changes.
* Used for cache invalidation when AutoDateHistogramAggregator changes rounding.
*/
private Rounding.Prepared lastPreparedRounding;

public HistogramSkiplistLeafCollector(
NumericDocValues values,
DocValuesSkipper skipper,
Rounding.Prepared preparedRounding,
LongKeyedBucketOrds bucketOrds,
LeafBucketCollector sub,
BucketsAggregator aggregator
) {
this.values = values;
this.skipper = skipper;
this.preparedRoundingSupplier = (owningBucketOrd) -> preparedRounding;
this.bucketOrdsSupplier = () -> bucketOrds;
this.sub = sub;
this.aggregator = aggregator;
this.increaseRoundingIfNeeded = (owningBucketOrd, rounded) -> {};
}

/**
* Constructor that accepts a supplier for dynamic rounding (used by AutoDateHistogramAggregator).
*/
public HistogramSkiplistLeafCollector(
NumericDocValues values,
DocValuesSkipper skipper,
LongFunction<Rounding.Prepared> preparedRoundingSupplier,
Supplier<LongKeyedBucketOrds> bucketOrdsSupplier,
LeafBucketCollector sub,
BucketsAggregator aggregator,
IncreaseRoundingIfNeeded increaseRoundingIfNeeded
) {
this.values = values;
this.skipper = skipper;
this.preparedRoundingSupplier = preparedRoundingSupplier;
this.bucketOrdsSupplier = bucketOrdsSupplier;
this.sub = sub;
this.aggregator = aggregator;
this.increaseRoundingIfNeeded = increaseRoundingIfNeeded;
}

@Override
public void setScorer(Scorable scorer) throws IOException {
if (sub != null) {
sub.setScorer(scorer);
}
}

private void advanceSkipper(int doc, long owningBucketOrd) throws IOException {
if (doc > skipper.maxDocID(0)) {
skipper.advance(doc);
}
upToSameBucket = false;

if (skipper.minDocID(0) > doc) {
// Corner case which happens if `doc` doesn't have a value and is between two
// intervals of
// the doc-value skip index.
upToInclusive = skipper.minDocID(0) - 1;
return;
}

upToInclusive = skipper.maxDocID(0);

// Get current rounding from supplier
Rounding.Prepared currentRounding = preparedRoundingSupplier.apply(owningBucketOrd);

// Now find the highest level where all docs map to the same bucket.
for (int level = 0; level < skipper.numLevels(); ++level) {
int totalDocsAtLevel = skipper.maxDocID(level) - skipper.minDocID(level) + 1;
long minBucket = currentRounding.round(skipper.minValue(level));
long maxBucket = currentRounding.round(skipper.maxValue(level));

if (skipper.docCount(level) == totalDocsAtLevel && minBucket == maxBucket) {
// All docs at this level have a value, and all values map to the same bucket.
upToInclusive = skipper.maxDocID(level);
upToSameBucket = true;
upToBucketIndex = bucketOrdsSupplier.get().add(owningBucketOrd, maxBucket);
if (upToBucketIndex < 0) {
upToBucketIndex = -1 - upToBucketIndex;
}
} else {
break;
}
}
}

@Override
public void collect(int doc, long owningBucketOrd) throws IOException {
Rounding.Prepared currentRounding = preparedRoundingSupplier.apply(owningBucketOrd);

// Check if rounding changed (using reference equality)
// AutoDateHistogramAggregator creates a new Rounding.Prepared instance when rounding changes
if (currentRounding != lastPreparedRounding) {
upToInclusive = -1; // Invalidate cache
upToSameBucket = false;
lastPreparedRounding = currentRounding;
}

if (doc > upToInclusive) {
advanceSkipper(doc, owningBucketOrd);
}

if (upToSameBucket) {
aggregator.incrementBucketDocCount(upToBucketIndex, 1L);
sub.collect(doc, upToBucketIndex);
} else if (values.advanceExact(doc)) {
final long value = values.longValue();
long rounded = currentRounding.round(value);
long bucketIndex = bucketOrdsSupplier.get().add(owningBucketOrd, rounded);
if (bucketIndex < 0) {
bucketIndex = -1 - bucketIndex;
aggregator.collectExistingBucket(sub, doc, bucketIndex);
} else {
aggregator.collectBucket(sub, doc, bucketIndex);
increaseRoundingIfNeeded.accept(owningBucketOrd, rounded);
}
}
}

@Override
public void collect(DocIdStream stream) throws IOException {
// This will only be called if its the top agg
collect(stream, 0);
}

@Override
public void collect(DocIdStream stream, long owningBucketOrd) throws IOException {
for (;;) {
int upToExclusive = upToInclusive + 1;
if (upToExclusive < 0) { // overflow
upToExclusive = Integer.MAX_VALUE;
}

if (upToSameBucket) {
if (sub == NO_OP_COLLECTOR) {
// stream.count maybe faster when we don't need to handle sub-aggs
long count = stream.count(upToExclusive);
aggregator.incrementBucketDocCount(upToBucketIndex, count);
} else {
final int[] count = { 0 };
stream.forEach(upToExclusive, doc -> {
sub.collect(doc, upToBucketIndex);
count[0]++;
});
aggregator.incrementBucketDocCount(upToBucketIndex, count[0]);
}
} else {
stream.forEach(upToExclusive, doc -> collect(doc, owningBucketOrd));
}

if (stream.mayHaveRemaining()) {
advanceSkipper(upToExclusive, owningBucketOrd);
} else {
break;
}
}
}

/**
* Call back for auto date histogram
*
* @opensearch.internal
*/
public interface IncreaseRoundingIfNeeded {
void accept(long owningBucket, long rounded);
}
}
Loading
Loading