Skip to content

Commit c2a8c77

Browse files
committed
Deal with subset collisions
1 parent 3ce5b0b commit c2a8c77

File tree

1 file changed

+59
-15
lines changed

1 file changed

+59
-15
lines changed

src/main/java/dev/morling/onebrc/CalculateAverage_chrisbellew.java

+59-15
Original file line numberDiff line numberDiff line change
@@ -225,13 +225,19 @@ static final class ThreadProcessor implements Runnable {
225225
*/
226226
private final long numbersAddress;
227227

228+
/**
229+
* The main memory address at the beginning of the name length table slots.
230+
*/
231+
private final long lengthsAddress;
232+
228233
/**
229234
* The SIMD vectors associated with each slot in the hash table. The
230235
* content of a given slot in a hash table is a lookup into this array.
231236
* The intent of having this array as an extra lookup is to allow N
232237
* vectors per slot while having fixed size slots.
233238
*/
234239
private ByteVector[] vectors = new ByteVector[200000];
240+
private String[] cityNames = new String[NUM_SLOTS];
235241

236242
/**
237243
* The next available index in the vectors array.
@@ -261,6 +267,7 @@ public ThreadProcessor(long start, long end, int bufferSize) {
261267
unsafe.putShort(slotsAddress + i * 2, (short) 0);
262268
}
263269
numbersAddress = unsafe.allocateMemory(NUM_SLOTS * 16);
270+
lengthsAddress = unsafe.allocateMemory(NUM_SLOTS);
264271
}
265272

266273
public final void run() {
@@ -479,9 +486,22 @@ private final void record(byte[] buffer, int nameStart, int nameEnd, int measure
479486
*/
480487
var vectorOffset = unsafe.getShort(slotsAddress + slotIndex * 2);
481488
while (vectorOffset != 0) {
482-
if (slotEquals(buffer, nameStart, vectorOffset, numVectors, MASKS[remainder], slotIndex)) {
483-
updateSlot(slotIndex, measurement);
484-
break;
489+
490+
/**
491+
* Check the set of vectors in the slot match the city name
492+
*/
493+
if (slotEquals(buffer, nameStart, vectorOffset, numVectors, remainder, slotIndex)) {
494+
495+
/**
496+
* Check the length of the slot name and city name match. This
497+
* check is needed because the vector equality check can give
498+
* false positives if one city name starts with another.
499+
*/
500+
byte slotNameLength = unsafe.getByte(lengthsAddress + slotIndex);
501+
if (slotNameLength == nameLength) {
502+
updateSlot(slotIndex, measurement);
503+
break;
504+
}
485505
}
486506

487507
/**
@@ -497,20 +517,36 @@ private final void record(byte[] buffer, int nameStart, int nameEnd, int measure
497517
* city name and measurement.
498518
*/
499519
if (vectorOffset == 0) {
520+
/**
521+
* Record where the city name length is recorded for this slot.
522+
*/
523+
unsafe.putByte(lengthsAddress + slotIndex, (byte) nameLength);
524+
525+
/**
526+
* Record where the start of the set of vectors are recorded for
527+
*/
500528
unsafe.putShort(slotsAddress + slotIndex * 2, nextVectorIndex);
529+
530+
/**
531+
* Records the vectors for the city name.
532+
*/
501533
for (int v = 0; v < numVectors; v++) {
502534
vectors[nextVectorIndex] = ByteVector.fromArray(SPECIES, buffer, nameStart + v * SPECIES.length());
503535
nextVectorIndex++;
504536
}
505537

506538
cityVectorLookup.put(new String(buffer, nameStart, nameLength), slotIndex);
507539

508-
// Min, max, count, sum
540+
/**
541+
* Min, max, count, sum
542+
*/
509543
var numbersIndex = getNumbersIndex(slotIndex);
510544
unsafe.putInt(numbersIndex, measurement);
511545
unsafe.putInt(numbersIndex + 4, measurement);
512546
unsafe.putInt(numbersIndex + 8, 1);
513547
unsafe.putInt(numbersIndex + 12, measurement);
548+
549+
cityNames[slotIndex] = new String(buffer, nameStart, nameLength);
514550
}
515551
}
516552

@@ -587,13 +623,21 @@ private final void updateSlot(int slotIndex, int measurement) {
587623
* length, so the last vector in the slot will be a partial vector. The
588624
* masks are used to ignore the unused bytes in the last vector.
589625
*/
590-
private final boolean slotEquals(byte[] buffer, int nameStart, int vectorOffset, int numVectors, VectorMask<Byte> lastVectorMask, int slotIndex) {
626+
private final boolean slotEquals(byte[] buffer, int nameStart, int vectorOffset, int numVectors, int remainder, int slotIndex) {
591627
for (int v = 0; v < numVectors; v++) {
592628
var nameVector = ByteVector.fromArray(SPECIES, buffer, nameStart + v * SPECIES.length());
593629
var slotVector = vectors[vectorOffset + v];
594630
if (v == numVectors - 1) {
595-
if (!slotVector.compare(VectorOperators.EQ, nameVector, lastVectorMask).equals(lastVectorMask)) {
596-
return false;
631+
if (remainder == 0) {
632+
if (!slotVector.eq(nameVector).allTrue()) {
633+
return false;
634+
}
635+
}
636+
else {
637+
var mask = MASKS[remainder - 1];
638+
if (!slotVector.compare(VectorOperators.EQ, nameVector, mask).equals(mask)) {
639+
return false;
640+
}
597641
}
598642
break;
599643
}
@@ -644,14 +688,14 @@ public void collectResults(TreeMap<String, CityResult> results) {
644688
* the last vector of the source line and a given slot in the hash table.
645689
*/
646690
private static final VectorMask<Byte>[] generateMasks(VectorSpecies<Byte> species) {
647-
VectorMask<Byte>[] masks = new VectorMask[species.length() + 1];
648-
for (int i = 0; i < species.length() + 1; i++) {
649-
boolean[] maskBooleans = new boolean[species.length()];
650-
for (int j = 0; j < species.length(); j++) {
651-
maskBooleans[j] = j < i;
652-
}
653-
masks[i] = VectorMask.fromArray(species, maskBooleans, 0);
654-
}
691+
VectorMask<Byte>[] masks = new VectorMask[species.length() - 1];
692+
masks[0] = VectorMask.fromArray(species, new boolean[]{ true, false, false, false, false, false, false, false }, 0);
693+
masks[1] = VectorMask.fromArray(species, new boolean[]{ true, true, false, false, false, false, false, false }, 0);
694+
masks[2] = VectorMask.fromArray(species, new boolean[]{ true, true, true, false, false, false, false, false }, 0);
695+
masks[3] = VectorMask.fromArray(species, new boolean[]{ true, true, true, true, false, false, false, false }, 0);
696+
masks[4] = VectorMask.fromArray(species, new boolean[]{ true, true, true, true, true, false, false, false }, 0);
697+
masks[5] = VectorMask.fromArray(species, new boolean[]{ true, true, true, true, true, true, false, false }, 0);
698+
masks[6] = VectorMask.fromArray(species, new boolean[]{ true, true, true, true, true, true, true, false }, 0);
655699
return masks;
656700
}
657701

0 commit comments

Comments
 (0)