Skip to content

Commit 6fbcceb

Browse files
authored
fix: tpc-h (IGinX-THU#519)
* test: remove temp tables and udf in tpc-h test * fix: tpc-h
1 parent 8371051 commit 6fbcceb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+556
-642
lines changed

core/src/main/java/cn/edu/tsinghua/iginx/engine/physical/memory/execute/compute/PhysicalFunctions.java

+19-16
Original file line numberDiff line numberDiff line change
@@ -83,25 +83,31 @@ public static IntVector filter(BufferAllocator allocator, BitVector bitmap) {
8383

8484
public static <OUTPUT extends FieldVector> void takeTo(
8585
BaseIntVector selection, OUTPUT output, OUTPUT input) {
86-
if (selection.getField().isNullable()) {
87-
throw new IllegalArgumentException("Selection vector must be not nullable");
88-
}
89-
9086
int outputOffset = output.getValueCount();
9187
int outputRowCount = outputOffset + selection.getValueCount();
88+
89+
boolean useSetSafe;
9290
if (output instanceof BaseFixedWidthVector) {
93-
output.setValueCount(outputRowCount);
94-
for (int selectionIndex = 0; selectionIndex < selection.getValueCount(); selectionIndex++) {
95-
int outputIndex = outputOffset + selectionIndex;
96-
output.copyFrom((int) selection.getValueAsLong(selectionIndex), outputIndex, input);
97-
}
91+
((BaseFixedWidthVector) output).allocateNew(outputRowCount);
92+
useSetSafe = false;
9893
} else {
99-
for (int selectionIndex = 0; selectionIndex < selection.getValueCount(); selectionIndex++) {
100-
int outputIndex = outputOffset + selectionIndex;
101-
output.copyFromSafe((int) selection.getValueAsLong(selectionIndex), outputIndex, input);
94+
output.setInitialCapacity(outputRowCount);
95+
useSetSafe = true;
96+
}
97+
boolean isNullable = input.getField().isNullable();
98+
for (int selectionIndex = 0; selectionIndex < selection.getValueCount(); selectionIndex++) {
99+
int outputIndex = outputOffset + selectionIndex;
100+
if (isNullable && selection.isNull(selectionIndex)) {
101+
output.setNull(outputIndex);
102+
} else {
103+
if (useSetSafe) {
104+
output.copyFromSafe((int) selection.getValueAsLong(selectionIndex), outputIndex, input);
105+
} else {
106+
output.copyFrom((int) selection.getValueAsLong(selectionIndex), outputIndex, input);
107+
}
102108
}
103-
output.setValueCount(outputRowCount);
104109
}
110+
output.setValueCount(outputRowCount);
105111
}
106112

107113
public static void takeTo(
@@ -119,9 +125,6 @@ public static void takeTo(
119125
@SuppressWarnings("unchecked")
120126
public static <OUTPUT extends FieldVector> OUTPUT take(
121127
BufferAllocator allocator, BaseIntVector selection, OUTPUT input) {
122-
if (selection.getField().isNullable()) {
123-
throw new IllegalArgumentException("Selection vector must be not nullable");
124-
}
125128
TransferPair transferPair = input.getTransferPair(allocator);
126129
OUTPUT result = (OUTPUT) transferPair.getTo();
127130
result.setInitialCapacity(selection.getValueCount());

core/src/main/java/cn/edu/tsinghua/iginx/engine/physical/memory/execute/compute/join/JoinHashMap.java

+74-55
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,6 @@
3232
import javax.annotation.Nullable;
3333
import javax.annotation.WillClose;
3434
import javax.annotation.WillCloseWhenClosed;
35-
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
36-
import org.apache.arrow.algorithm.sort.IndexSorter;
37-
import org.apache.arrow.algorithm.sort.StableVectorComparator;
3835
import org.apache.arrow.memory.BufferAllocator;
3936
import org.apache.arrow.util.Preconditions;
4037
import org.apache.arrow.vector.*;
@@ -106,7 +103,7 @@ public void probe(VectorSchemaRoot probeSideBatch) throws ComputeException {
106103
IntVector buildSideCandidateIndices = buildSideCandidateIndicesBuilder.build();
107104
IntVector proSideCandidateIndices = probeSideCandidateIndicesBuilder.build()) {
108105
boolean[] probeSideMatched = new boolean[probeSideBatch.getRowCount()];
109-
output(
106+
outputMatchedAndUnmatched(
110107
outputDictionaryProvider,
111108
buildSideCandidateIndices,
112109
proSideCandidateIndices,
@@ -143,12 +140,12 @@ public void flush() throws ComputeException {
143140
IntVector buildSideIndices = buildSideIndicesBuilder.build(buildSideUnmatchedCount);
144141
IntVector probeSideIndices = probeSideIndicesBuilder.build(buildSideUnmatchedCount);
145142
BitVector mark = markBuilder.build(buildSideUnmatchedCount)) {
146-
output(dictionary, buildSideIndices, probeSideIndices, mark);
143+
output(dictionary, buildSideIndices, probeSideIndices, mark, 0);
147144
}
148145
}
149146
}
150147

151-
private void output(
148+
private void outputMatchedAndUnmatched(
152149
ArrayDictionaryProvider dictionary,
153150
IntVector buildSideCandidateIndices,
154151
IntVector proSideCandidateIndices,
@@ -160,32 +157,37 @@ private void output(
160157
allocator, "buildSideIndices", buildSideCandidateIndices.getValueCount());
161158
SelectionBuilder probeSideIndicesBuilder =
162159
new SelectionBuilder(
163-
allocator, "probeSideIndices", proSideCandidateIndices.getValueCount());
164-
MarkBuilder markBuilder = getMarkBuilder(probeSideMatched.length)) {
160+
allocator, "probeSideIndices", proSideCandidateIndices.getValueCount())) {
165161

162+
int matchedCount = 0;
166163
try (VectorSchemaRoot candidate =
167164
getDictionaryEncodedBatch(
168165
buildSideCandidateIndices, proSideCandidateIndices, dictionary);
169166
BaseIntVector indicesSelection = matcher.filter(allocator, dictionary, candidate, null)) {
170-
outputMatched(
171-
buildSideIndicesBuilder,
172-
probeSideIndicesBuilder,
173-
markBuilder,
174-
buildSideCandidateIndices,
175-
proSideCandidateIndices,
176-
indicesSelection,
177-
probeSideMatched);
167+
matchedCount =
168+
outputMatched(
169+
buildSideIndicesBuilder,
170+
probeSideIndicesBuilder,
171+
buildSideCandidateIndices,
172+
proSideCandidateIndices,
173+
indicesSelection,
174+
probeSideMatched);
178175
}
179176

177+
int unmatchedCount = 0;
180178
if (joinOption.isToOutputProbeSideUnmatched()) {
181-
outputProbeSideUnmatched(probeSideIndicesBuilder, markBuilder, probeSideMatched);
179+
unmatchedCount = outputProbeSideUnmatched(probeSideIndicesBuilder, probeSideMatched);
182180
}
183181

184-
try (IntVector probeSideIndices = probeSideIndicesBuilder.build();
185-
IntVector buildSideIndices =
186-
buildSideIndicesBuilder.build(probeSideIndices.getValueCount());
187-
BitVector mark = markBuilder.build(probeSideIndices.getValueCount())) {
188-
output(dictionary, buildSideIndices, probeSideIndices, mark);
182+
try (MarkBuilder markBuilder = getMarkBuilder(matchedCount + unmatchedCount)) {
183+
markBuilder.appendTrue(matchedCount);
184+
markBuilder.appendFalse(unmatchedCount);
185+
try (IntVector probeSideIndices = probeSideIndicesBuilder.build();
186+
IntVector buildSideIndices =
187+
buildSideIndicesBuilder.build(probeSideIndices.getValueCount());
188+
BitVector mark = markBuilder.build(probeSideIndices.getValueCount())) {
189+
output(dictionary, buildSideIndices, probeSideIndices, mark, unmatchedCount);
190+
}
189191
}
190192
}
191193
}
@@ -198,10 +200,9 @@ private MarkBuilder getMarkBuilder(int capacity) {
198200
}
199201
}
200202

201-
private void outputMatched(
203+
private int outputMatched(
202204
SelectionBuilder buildSideIndicesBuilder,
203205
SelectionBuilder probeSideIndicesBuilder,
204-
MarkBuilder markBuilder,
205206
IntVector buildSideCandidateIndices,
206207
IntVector proSideCandidateIndices,
207208
@Nullable BaseIntVector indicesSelection,
@@ -210,20 +211,18 @@ private void outputMatched(
210211
Preconditions.checkState(
211212
buildSideCandidateIndices.getValueCount() == proSideCandidateIndices.getValueCount());
212213
if (indicesSelection == null) {
213-
outputMatched(
214+
return outputMatched(
214215
buildSideIndicesBuilder,
215216
probeSideIndicesBuilder,
216-
markBuilder,
217217
buildSideCandidateIndices,
218218
proSideCandidateIndices,
219219
probeSideMatched,
220220
proSideCandidateIndices.getValueCount(),
221221
i -> i);
222222
} else {
223-
outputMatched(
223+
return outputMatched(
224224
buildSideIndicesBuilder,
225225
probeSideIndicesBuilder,
226-
markBuilder,
227226
buildSideCandidateIndices,
228227
proSideCandidateIndices,
229228
probeSideMatched,
@@ -232,10 +231,9 @@ private void outputMatched(
232231
}
233232
}
234233

235-
private void outputMatched(
234+
private int outputMatched(
236235
SelectionBuilder buildSideIndicesBuilder,
237236
SelectionBuilder probeSideIndicesBuilder,
238-
MarkBuilder markBuilder,
239237
IntVector buildSideCandidateIndices,
240238
IntVector proSideCandidateIndices,
241239
boolean[] probeSideMatched,
@@ -266,21 +264,19 @@ private void outputMatched(
266264
buildSideIndicesBuilder.append(buildSideMatchedIndex);
267265
probeSideIndicesBuilder.append(probeSideMatchedIndex);
268266
}
269-
markBuilder.appendTrue(probeSideMatchedCount);
267+
return probeSideMatchedCount;
270268
}
271269

272-
private void outputProbeSideUnmatched(
273-
SelectionBuilder probeSideIndicesBuilder,
274-
MarkBuilder markBuilder,
275-
boolean[] probeSideMatched) {
270+
private int outputProbeSideUnmatched(
271+
SelectionBuilder probeSideIndicesBuilder, boolean[] probeSideMatched) {
276272
int probeSideUnmatchedCount = 0;
277273
for (int probeSideIndex = 0; probeSideIndex < probeSideMatched.length; probeSideIndex++) {
278274
if (!probeSideMatched[probeSideIndex]) {
279275
probeSideUnmatchedCount++;
280276
probeSideIndicesBuilder.append(probeSideIndex);
281277
}
282278
}
283-
markBuilder.appendFalse(probeSideUnmatchedCount);
279+
return probeSideUnmatchedCount;
284280
}
285281

286282
private VectorSchemaRoot getDictionaryEncodedBatch(
@@ -306,7 +302,8 @@ private void output(
306302
ArrayDictionaryProvider dictionaryProvider,
307303
BaseIntVector buildSideIndices,
308304
BaseIntVector probeSideIndices,
309-
@Nullable BitVector mark)
305+
@Nullable BitVector mark,
306+
int unmatchedCount)
310307
throws ComputeException {
311308
Preconditions.checkArgument(
312309
buildSideIndices.getValueCount() == probeSideIndices.getValueCount());
@@ -335,7 +332,7 @@ private void output(
335332
VectorSchemaRoot output =
336333
ScalarExpressions.evaluate(
337334
allocator, dictionaryProvider, result, null, outputExpressions);
338-
BaseIntVector selection = getSelection(probeSideIndices)) {
335+
BaseIntVector selection = getSelection(probeSideIndices, unmatchedCount)) {
339336
resultConsumer.consume(
340337
dictionaryProvider.slice(allocator),
341338
VectorSchemaRoots.transfer(allocator, output),
@@ -344,33 +341,55 @@ private void output(
344341
}
345342

346343
@Nullable
347-
private BaseIntVector getSelection(BaseIntVector probeSideIndices) {
348-
if (!joinOption.isToOutputProbeSideUnmatched() || !joinOption.isOrderByProbeSideOrdinal()) {
344+
private BaseIntVector getSelection(BaseIntVector probeSideIndices, int unmatchedCount) {
345+
int total = probeSideIndices.getValueCount();
346+
int matchedCount = total - unmatchedCount;
347+
if (matchedCount == 0 || unmatchedCount == 0 || !joinOption.isOrderByProbeSideOrdinal()) {
349348
return null;
350349
}
351-
if (isInOrder(probeSideIndices)) {
350+
351+
if (isInOrder(probeSideIndices, matchedCount - 1, matchedCount)) {
352352
return null;
353353
}
354354

355-
IntVector selection = new IntVector("selection", allocator);
356-
selection.allocateNew(probeSideIndices.getValueCount());
357-
selection.setValueCount(probeSideIndices.getValueCount());
358-
new IndexSorter<>()
359-
.sort(
360-
probeSideIndices,
361-
selection,
362-
new StableVectorComparator<>(
363-
DefaultVectorComparators.createDefaultComparator(probeSideIndices)));
364-
return selection;
355+
try (SelectionBuilder selectionBuilder = new SelectionBuilder(allocator, "selection", total)) {
356+
// merge from [0,matchedCount) and from [matchedCount,probeSideIndices.getValueCount())
357+
int leftCursor = 0;
358+
int rightCursor = matchedCount;
359+
while (true) {
360+
if (leftCursor < matchedCount) {
361+
if (rightCursor < total) {
362+
if (isInOrder(probeSideIndices, leftCursor, rightCursor)) {
363+
selectionBuilder.append(leftCursor++);
364+
} else {
365+
selectionBuilder.append(rightCursor++);
366+
}
367+
} else {
368+
selectionBuilder.append(leftCursor++);
369+
}
370+
} else {
371+
if (rightCursor < total) {
372+
selectionBuilder.append(rightCursor++);
373+
} else {
374+
break;
375+
}
376+
}
377+
}
378+
return selectionBuilder.build();
379+
}
365380
}
366381

367-
private static boolean isInOrder(BaseIntVector vector) {
368-
for (int i = 1; i < vector.getValueCount(); i++) {
369-
if (vector.getValueAsLong(i) < vector.getValueAsLong((i - 1))) {
382+
private static boolean isInOrder(BaseIntVector vector, int leftIndex, int rightIndex) {
383+
// null is biggest
384+
if (vector.getField().isNullable()) {
385+
if (vector.isNull(rightIndex)) {
386+
return true;
387+
}
388+
if (vector.isNull(leftIndex)) {
370389
return false;
371390
}
372391
}
373-
return true;
392+
return vector.getValueAsLong(leftIndex) <= vector.getValueAsLong(rightIndex);
374393
}
375394

376395
public static class Builder implements AutoCloseable {

core/src/main/java/cn/edu/tsinghua/iginx/engine/physical/memory/execute/compute/join/JoinOption.java

+2-6
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ public enum JoinOption {
2727
LEFT(true, false),
2828
RIGHT(false, true),
2929
FULL(true, true),
30-
MARK(true, false, true),
31-
SINGLE(false, false, false);
30+
MARK(false, true, false, true, false, true),
31+
SINGLE(false, true, true, false, false, false);
3232

3333
private final boolean toOutputProbeSideUnmatched;
3434
private final boolean toOutputBuildSideUnmatched;
@@ -41,10 +41,6 @@ public enum JoinOption {
4141
this(toOutputBuildSideUnmatched, toOutputProbeSideUnmatched, false, true, true, false);
4242
}
4343

44-
JoinOption(boolean allowedToMatchMultiple, boolean toOutputAllMatched, boolean toOutputMark) {
45-
this(false, true, true, allowedToMatchMultiple, toOutputAllMatched, toOutputMark);
46-
}
47-
4844
JoinOption(
4945
boolean toOutputBuildSideUnmatched,
5046
boolean toOutputProbeSideUnmatched,

core/src/main/java/cn/edu/tsinghua/iginx/engine/physical/memory/execute/compute/scalar/ScalarFunction.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ default OUTPUT invoke(
7070
}
7171
try (VectorSchemaRoot flattened =
7272
VectorSchemaRoots.flatten(allocator, dictionaryProvider, input, selection)) {
73-
return invoke(allocator, selection, flattened);
73+
return invoke(allocator, flattened);
7474
}
7575
}
7676
}

core/src/main/java/cn/edu/tsinghua/iginx/engine/physical/memory/execute/compute/scalar/convert/cast/AbstractScalarCast.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ public OUTPUT evaluate(
6262
return evaluate(allocator, input);
6363
}
6464
try (FieldVector selected = PhysicalFunctions.take(allocator, selection, input)) {
65-
return evaluate(allocator, selection);
65+
return evaluate(allocator, selected);
6666
}
6767
}
6868

core/src/main/java/cn/edu/tsinghua/iginx/engine/physical/memory/execute/compute/scalar/register/Callee.java

-42
This file was deleted.

0 commit comments

Comments
 (0)